diff options
Diffstat (limited to 'llvm')
231 files changed, 9206 insertions, 8666 deletions
diff --git a/llvm/docs/CommandGuide/llvm-cxxfilt.rst b/llvm/docs/CommandGuide/llvm-cxxfilt.rst index 0933f0b..3f7deb1 100644 --- a/llvm/docs/CommandGuide/llvm-cxxfilt.rst +++ b/llvm/docs/CommandGuide/llvm-cxxfilt.rst @@ -48,10 +48,6 @@ OPTIONS Print a summary of command line options. -.. option:: --no-params, -p - - Do not demangle function parameters or return types. - .. option:: --no-strip-underscore, -n Do not strip a leading underscore. This is the default for all platforms diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt index 576603c..51b3925 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS ExecutionEngine IRReader JITLink + OrcDebugging OrcJIT OrcShared OrcTargetProcess diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/LLJITWithRemoteDebugging.cpp b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/LLJITWithRemoteDebugging.cpp index 9001125..1f69415 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/LLJITWithRemoteDebugging.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/LLJITWithRemoteDebugging.cpp @@ -77,6 +77,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ExecutionEngine/Orc/Debugging/DebuggerSupport.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" @@ -174,32 +175,13 @@ int main(int argc, char *argv[]) { TSMs.push_back(ExitOnErr(parseExampleModuleFromFile(Path))); } - std::string TT; - StringRef MainModuleName; - TSMs.front().withModuleDo([&MainModuleName, &TT](Module &M) { - MainModuleName = M.getName(); - TT = M.getTargetTriple(); - if (TT.empty()) - TT = sys::getProcessTriple(); - }); - - // Create a target machine that matches the input triple. - JITTargetMachineBuilder JTMB((Triple(TT))); - JTMB.setCodeModel(CodeModel::Small); - JTMB.setRelocationModel(Reloc::PIC_); - // Create LLJIT and destroy it before disconnecting the target process. outs() << "Initializing LLJIT for remote executor\n"; - auto J = ExitOnErr(LLJITBuilder() - .setExecutorProcessControl(std::move(EPC)) - .setJITTargetMachineBuilder(std::move(JTMB)) - .setObjectLinkingLayerCreator([&](auto &ES, const auto &TT) { - return std::make_unique<ObjectLinkingLayer>(ES); - }) - .create()); + auto J = ExitOnErr( + LLJITBuilder().setExecutorProcessControl(std::move(EPC)).create()); // Add plugin for debug support. - ExitOnErr(addDebugSupport(J->getObjLinkingLayer())); + ExitOnErr(enableDebuggerSupport(*J)); // Load required shared libraries on the remote target and add a generator // for each of it, so the compiler can lookup their symbols. diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp index 49f5fcd..b11d875 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp @@ -27,22 +27,6 @@ using namespace llvm; using namespace llvm::orc; -Error addDebugSupport(ObjectLayer &ObjLayer) { - ExecutionSession &ES = ObjLayer.getExecutionSession(); - auto Registrar = createJITLoaderGDBRegistrar(ES); - if (!Registrar) - return Registrar.takeError(); - - auto *ObjLinkingLayer = cast<ObjectLinkingLayer>(&ObjLayer); - if (!ObjLinkingLayer) - return createStringError(inconvertibleErrorCode(), - "No debug support for given object layer type"); - - ObjLinkingLayer->addPlugin(std::make_unique<DebugObjectManagerPlugin>( - ES, std::move(*Registrar), true, true)); - return Error::success(); -} - Expected<std::unique_ptr<DefinitionGenerator>> loadDylib(ExecutionSession &ES, StringRef RemotePath) { if (auto Handle = ES.getExecutorProcessControl().loadDylib(RemotePath.data())) @@ -111,11 +95,15 @@ launchLocalExecutor(StringRef ExecutablePath) { close(FromExecutor[ReadEnd]); // Execute the child process. - std::unique_ptr<char[]> ExecPath, FDSpecifier; + std::unique_ptr<char[]> ExecPath, FDSpecifier, TestOutputFlag; { ExecPath = std::make_unique<char[]>(ExecutablePath.size() + 1); strcpy(ExecPath.get(), ExecutablePath.data()); + const char *TestOutputFlagStr = "test-jitloadergdb"; + TestOutputFlag = std::make_unique<char[]>(strlen(TestOutputFlagStr) + 1); + strcpy(TestOutputFlag.get(), TestOutputFlagStr); + std::string FDSpecifierStr("filedescs="); FDSpecifierStr += utostr(ToExecutor[ReadEnd]); FDSpecifierStr += ','; @@ -124,7 +112,8 @@ launchLocalExecutor(StringRef ExecutablePath) { strcpy(FDSpecifier.get(), FDSpecifierStr.c_str()); } - char *const Args[] = {ExecPath.get(), FDSpecifier.get(), nullptr}; + char *const Args[] = {ExecPath.get(), TestOutputFlag.get(), + FDSpecifier.get(), nullptr}; int RC = execvp(ExecPath.get(), Args); if (RC != 0) return make_error<StringError>( diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h index e7cad9f..3663307 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h @@ -36,8 +36,6 @@ launchLocalExecutor(llvm::StringRef ExecutablePath); llvm::Expected<std::unique_ptr<llvm::orc::SimpleRemoteEPC>> connectTCPSocket(llvm::StringRef NetworkAddress); -llvm::Error addDebugSupport(llvm::orc::ObjectLayer &ObjLayer); - llvm::Expected<std::unique_ptr<llvm::orc::DefinitionGenerator>> loadDylib(llvm::orc::ExecutionSession &ES, llvm::StringRef RemotePath); diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 4a952cc..8afe455 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -411,6 +411,7 @@ class AliasSetsPrinterPass : public PassInfoMixin<AliasSetsPrinterPass> { public: explicit AliasSetsPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h index 12dd9b0..96ae32d 100644 --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -189,6 +189,8 @@ public: explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// An immutable pass that tracks lazily created \c AssumptionCache diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/llvm/include/llvm/Analysis/BlockFrequencyInfo.h index 95d75b0..179fd06 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfo.h @@ -134,6 +134,8 @@ public: explicit BlockFrequencyPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes \c BlockFrequencyInfo. diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index fb02997..6b9d178 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -436,6 +436,8 @@ public: explicit BranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes \c BranchProbabilityInfo. diff --git a/llvm/include/llvm/Analysis/CFGSCCPrinter.h b/llvm/include/llvm/Analysis/CFGSCCPrinter.h index d980714..0ea0b46 100644 --- a/llvm/include/llvm/Analysis/CFGSCCPrinter.h +++ b/llvm/include/llvm/Analysis/CFGSCCPrinter.h @@ -19,6 +19,7 @@ class CFGSCCPrinterPass : public PassInfoMixin<CFGSCCPrinterPass> { public: explicit CFGSCCPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/CallGraph.h b/llvm/include/llvm/Analysis/CallGraph.h index 9413b39..8877437 100644 --- a/llvm/include/llvm/Analysis/CallGraph.h +++ b/llvm/include/llvm/Analysis/CallGraph.h @@ -322,6 +322,8 @@ public: explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Printer pass for the summarized \c CallGraphAnalysis results. @@ -333,6 +335,8 @@ public: explicit CallGraphSCCsPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// The \c ModulePass which wraps up a \c CallGraph and the logic to diff --git a/llvm/include/llvm/Analysis/CallPrinter.h b/llvm/include/llvm/Analysis/CallPrinter.h index d325d00..95cb5cc 100644 --- a/llvm/include/llvm/Analysis/CallPrinter.h +++ b/llvm/include/llvm/Analysis/CallPrinter.h @@ -24,12 +24,14 @@ class ModulePass; class CallGraphDOTPrinterPass : public PassInfoMixin<CallGraphDOTPrinterPass> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Pass for viewing the call graph class CallGraphViewerPass : public PassInfoMixin<CallGraphViewerPass> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; ModulePass *createCallGraphViewerPass(); diff --git a/llvm/include/llvm/Analysis/CostModel.h b/llvm/include/llvm/Analysis/CostModel.h index 6491680..9b127c2 100644 --- a/llvm/include/llvm/Analysis/CostModel.h +++ b/llvm/include/llvm/Analysis/CostModel.h @@ -20,6 +20,8 @@ public: explicit CostModelPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h index 099d761..ce939ef 100644 --- a/llvm/include/llvm/Analysis/CycleAnalysis.h +++ b/llvm/include/llvm/Analysis/CycleAnalysis.h @@ -68,6 +68,8 @@ public: explicit CycleInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index bc599cb..bd559f3 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -427,6 +427,7 @@ public: explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + static bool isRequired() { return true; } private: raw_ostream &OS; diff --git a/llvm/include/llvm/Analysis/DDGPrinter.h b/llvm/include/llvm/Analysis/DDGPrinter.h index d93c282..4aa154d 100644 --- a/llvm/include/llvm/Analysis/DDGPrinter.h +++ b/llvm/include/llvm/Analysis/DDGPrinter.h @@ -29,6 +29,7 @@ class DDGDotPrinterPass : public PassInfoMixin<DDGDotPrinterPass> { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + static bool isRequired() { return true; } }; //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h index 95a36b8..a00adb2 100644 --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -140,6 +140,7 @@ struct DelinearizationPrinterPass : public PassInfoMixin<DelinearizationPrinterPass> { explicit DelinearizationPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } private: raw_ostream &OS; diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h index 6e4bfcf..aac7382 100644 --- a/llvm/include/llvm/Analysis/DemandedBits.h +++ b/llvm/include/llvm/Analysis/DemandedBits.h @@ -120,6 +120,8 @@ public: explicit DemandedBitsPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 327315f..f0a0964 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -994,6 +994,8 @@ namespace llvm { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } + private: raw_ostream &OS; bool NormalizeResults; diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h index db0130e..b65cdc9 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -204,6 +204,8 @@ public: explicit DominanceFrontierPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index 3e9eb93..f5fbbdc 100644 --- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -157,6 +157,8 @@ public: explicit FunctionPropertiesPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Correctly update FunctionPropertiesInfo post-inlining. A diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index ad137ba..0d19de6 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -1198,6 +1198,7 @@ class IRSimilarityAnalysisPrinterPass public: explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 2740106..5f36ee6 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -341,7 +341,7 @@ public: Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); } }; -/// Printer pass for the FunctionPropertiesAnalysis results. +/// Printer pass for the InlineAdvisorAnalysis results. class InlineAdvisorAnalysisPrinterPass : public PassInfoMixin<InlineAdvisorAnalysisPrinterPass> { raw_ostream &OS; @@ -353,6 +353,7 @@ public: PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); + static bool isRequired() { return true; } }; std::unique_ptr<InlineAdvisor> diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 3f0bb87..3a760e0 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -343,6 +343,7 @@ struct InlineCostAnnotationPrinterPass public: explicit InlineCostAnnotationPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h index 0aae696..b44edd3 100644 --- a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h +++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h @@ -40,6 +40,8 @@ public: explicit InlineSizeEstimatorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // namespace llvm #endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index 211a058..68c98b4 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -1288,6 +1288,8 @@ public: explicit LazyCallGraphPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// A pass which prints the call graph as a DOT file to a \c raw_ostream. @@ -1301,6 +1303,8 @@ public: explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index 25a2c9f..5611a2b 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -157,6 +157,8 @@ public: explicit LazyValueInfoPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Wrapper around LazyValueInfo. diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index c9e853b..4fd2485 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -291,6 +291,8 @@ public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 3b10638..5208463 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -580,11 +580,13 @@ class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> { public: explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Verifier pass for the \c LoopAnalysis results. struct LoopVerifierPass : public PassInfoMixin<LoopVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// The legacy pass manager's analysis pass to compute loop information. diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 852a6c4..3b33dd5 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -217,6 +217,8 @@ public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/MemDerefPrinter.h b/llvm/include/llvm/Analysis/MemDerefPrinter.h index bafdc54..ba376da 100644 --- a/llvm/include/llvm/Analysis/MemDerefPrinter.h +++ b/llvm/include/llvm/Analysis/MemDerefPrinter.h @@ -18,6 +18,7 @@ class MemDerefPrinterPass : public PassInfoMixin<MemDerefPrinterPass> { public: MemDerefPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 94d7f1a..caf0e31 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -953,6 +953,8 @@ public: : OS(OS), EnsureOptimizedUses(EnsureOptimizedUses) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Printer pass for \c MemorySSA via the walker. @@ -964,11 +966,14 @@ public: explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Verifier pass for \c MemorySSA. struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes \c MemorySSA. diff --git a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h index fa91e4f..e69db78 100644 --- a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h +++ b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h @@ -23,6 +23,7 @@ class ModuleDebugInfoPrinterPass public: explicit ModuleDebugInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index 9c97bd1..468d94e 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -547,6 +547,7 @@ class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> { public: MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; class MustBeExecutedContextPrinterPass @@ -556,6 +557,7 @@ class MustBeExecutedContextPrinterPass public: MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/PhiValues.h b/llvm/include/llvm/Analysis/PhiValues.h index ecbb887..a749af3 100644 --- a/llvm/include/llvm/Analysis/PhiValues.h +++ b/llvm/include/llvm/Analysis/PhiValues.h @@ -132,6 +132,7 @@ class PhiValuesPrinterPass : public PassInfoMixin<PhiValuesPrinterPass> { public: explicit PhiValuesPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Wrapper pass for the legacy pass manager diff --git a/llvm/include/llvm/Analysis/PostDominators.h b/llvm/include/llvm/Analysis/PostDominators.h index 4383113..92e30f82 100644 --- a/llvm/include/llvm/Analysis/PostDominators.h +++ b/llvm/include/llvm/Analysis/PostDominators.h @@ -68,6 +68,8 @@ public: explicit PostDominatorTreePrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; struct PostDominatorTreeWrapperPass : public FunctionPass { diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index e49538b..73be9e1 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -389,6 +389,7 @@ class ProfileSummaryPrinterPass public: explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/RegionInfo.h b/llvm/include/llvm/Analysis/RegionInfo.h index 612b977..fc8df36 100644 --- a/llvm/include/llvm/Analysis/RegionInfo.h +++ b/llvm/include/llvm/Analysis/RegionInfo.h @@ -983,11 +983,14 @@ public: explicit RegionInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Verifier pass for the \c RegionInfo. struct RegionInfoVerifierPass : PassInfoMixin<RegionInfoVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; template <> diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 4f1237c..af3ad82 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -2246,6 +2246,7 @@ class ScalarEvolutionVerifierPass : public PassInfoMixin<ScalarEvolutionVerifierPass> { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Printer pass for the \c ScalarEvolutionAnalysis results. @@ -2257,6 +2258,8 @@ public: explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; class ScalarEvolutionWrapperPass : public FunctionPass { diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h index 7fd8836..438407f 100644 --- a/llvm/include/llvm/Analysis/StackLifetime.h +++ b/llvm/include/llvm/Analysis/StackLifetime.h @@ -190,6 +190,7 @@ public: StackLifetimePrinterPass(raw_ostream &OS, StackLifetime::LivenessType Type) : Type(Type), OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } void printPipeline(raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName); }; diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index 751735f..2966f0c 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -105,6 +105,7 @@ class StackSafetyPrinterPass : public PassInfoMixin<StackSafetyPrinterPass> { public: explicit StackSafetyPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// StackSafetyInfo wrapper for the legacy pass manager @@ -143,6 +144,7 @@ class StackSafetyGlobalPrinterPass public: explicit StackSafetyGlobalPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// This pass performs the global (interprocedural) stack safety analysis diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 0eef17d..9f33c69 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -24,6 +24,8 @@ public: : OS(OS), EnableDetailedStructuralHash(Detailed) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/UniformityAnalysis.h b/llvm/include/llvm/Analysis/UniformityAnalysis.h index f42c495..c38d100 100644 --- a/llvm/include/llvm/Analysis/UniformityAnalysis.h +++ b/llvm/include/llvm/Analysis/UniformityAnalysis.h @@ -47,6 +47,8 @@ public: explicit UniformityInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes a \ref CycleInfo. diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index f7d362b..c726508 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -239,7 +239,8 @@ public: // Emit the bits with VBR encoding, NumBits-1 bits at a time. while (Val >= Threshold) { - Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); + Emit((Val & ((1U << (NumBits - 1)) - 1)) | (1U << (NumBits - 1)), + NumBits); Val >>= NumBits-1; } @@ -255,7 +256,8 @@ public: // Emit the bits with VBR encoding, NumBits-1 bits at a time. while (Val >= Threshold) { - Emit(((uint32_t)Val & ((1 << (NumBits - 1)) - 1)) | (1 << (NumBits - 1)), + Emit(((uint32_t)Val & ((1U << (NumBits - 1)) - 1)) | + (1U << (NumBits - 1)), NumBits); Val >>= NumBits-1; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 711ba10..916e6dd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -288,11 +288,14 @@ private: // Implements floating-point environment read/write via library function call. LegalizeResult createGetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI); + MachineInstr &MI, + LostDebugLocObserver &LocObserver); LegalizeResult createSetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI); + MachineInstr &MI, + LostDebugLocObserver &LocObserver); LegalizeResult createResetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI); + MachineInstr &MI, + LostDebugLocObserver &LocObserver); public: /// Return the alignment to use for a stack temporary object with the given @@ -440,13 +443,15 @@ public: LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC); + ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC, + LostDebugLocObserver &LocObserver, MachineInstr *MI = nullptr); /// Helper function that creates the given libcall. LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args); + ArrayRef<CallLowering::ArgInfo> Args, + LostDebugLocObserver &LocObserver, MachineInstr *MI = nullptr); /// Create a libcall to memcpy et al. LegalizerHelper::LegalizeResult diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index e51a3ec..9880e82 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -35,6 +35,7 @@ extern cl::opt<bool> DisableGISelLegalityCheck; class MachineFunction; class raw_ostream; class LegalizerHelper; +class LostDebugLocObserver; class MachineInstr; class MachineRegisterInfo; class MCInstrInfo; @@ -1288,8 +1289,8 @@ public: const MachineRegisterInfo &MRI) const; /// Called for instructions with the Custom LegalizationAction. - virtual bool legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { + virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { llvm_unreachable("must implement this if custom action is used"); } diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 5c44538..7f95787 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -381,6 +381,7 @@ private: bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; + bool Disjoint : 1; bool NonNeg : 1; bool NoNaNs : 1; bool NoInfs : 1; @@ -402,10 +403,11 @@ private: public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NonNeg(false), - NoNaNs(false), NoInfs(false), NoSignedZeros(false), - AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} + : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), + Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false), + NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), + ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false), + Unpredictable(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -422,6 +424,7 @@ public: void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } void setNoSignedWrap(bool b) { NoSignedWrap = b; } void setExact(bool b) { Exact = b; } + void setDisjoint(bool b) { Disjoint = b; } void setNonNeg(bool b) { NonNeg = b; } void setNoNaNs(bool b) { NoNaNs = b; } void setNoInfs(bool b) { NoInfs = b; } @@ -437,6 +440,7 @@ public: bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } bool hasNoSignedWrap() const { return NoSignedWrap; } bool hasExact() const { return Exact; } + bool hasDisjoint() const { return Disjoint; } bool hasNonNeg() const { return NonNeg; } bool hasNoNaNs() const { return NoNaNs; } bool hasNoInfs() const { return NoInfs; } @@ -454,6 +458,7 @@ public: NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; + Disjoint &= Flags.Disjoint; NonNeg &= Flags.NonNeg; NoNaNs &= Flags.NoNaNs; NoInfs &= Flags.NoInfs; diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index fe12960..70cfc14 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -32,7 +32,7 @@ enum : int { /// Returns a non-NULL pointer to a NUL-terminated C style string /// that should be explicitly freed, if successful. Otherwise, may return /// nullptr if mangled_name is not a valid mangling or is nullptr. -char *itaniumDemangle(std::string_view mangled_name, bool ParseParams = true); +char *itaniumDemangle(std::string_view mangled_name); enum MSDemangleFlags { MSDF_None = 0, @@ -68,8 +68,7 @@ char *dlangDemangle(std::string_view MangledName); std::string demangle(std::string_view MangledName); bool nonMicrosoftDemangle(std::string_view MangledName, std::string &Result, - bool CanHaveLeadingDot = true, - bool ParseParams = true); + bool CanHaveLeadingDot = true); /// "Partial" demangler. This supports demangling a string into an AST /// (typically an intermediate stage in itaniumDemangle) and querying certain diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 06956f4..e0ff035 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2793,7 +2793,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser { Node *parseClassEnumType(); Node *parseQualifiedType(); - Node *parseEncoding(bool ParseParams = true); + Node *parseEncoding(); bool parseCallOffset(); Node *parseSpecialName(); @@ -2910,7 +2910,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser { Node *parseDestructorName(); /// Top-level entry point into the parser. - Node *parse(bool ParseParams = true); + Node *parse(); }; const char* parse_discriminator(const char* first, const char* last); @@ -5404,7 +5404,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() { // ::= <data name> // ::= <special-name> template <typename Derived, typename Alloc> -Node *AbstractManglingParser<Derived, Alloc>::parseEncoding(bool ParseParams) { +Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() { // The template parameters of an encoding are unrelated to those of the // enclosing context. SaveTemplateParams SaveTemplateParamsScope(this); @@ -5430,16 +5430,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseEncoding(bool ParseParams) { if (IsEndOfEncoding()) return Name; - // ParseParams may be false at the top level only, when called from parse(). - // For example in the mangled name _Z3fooILZ3BarEET_f, ParseParams may be - // false when demangling 3fooILZ3BarEET_f but is always true when demangling - // 3Bar. - if (!ParseParams) { - while (consume()) - ; - return Name; - } - Node *Attrs = nullptr; if (consumeIf("Ua9enable_ifI")) { size_t BeforeArgs = Names.size(); @@ -5904,9 +5894,9 @@ AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) { // extension ::= ___Z <encoding> _block_invoke<decimal-digit>+ // extension ::= ___Z <encoding> _block_invoke_<decimal-digit>+ template <typename Derived, typename Alloc> -Node *AbstractManglingParser<Derived, Alloc>::parse(bool ParseParams) { +Node *AbstractManglingParser<Derived, Alloc>::parse() { if (consumeIf("_Z") || consumeIf("__Z")) { - Node *Encoding = getDerived().parseEncoding(ParseParams); + Node *Encoding = getDerived().parseEncoding(); if (Encoding == nullptr) return nullptr; if (look() == '.') { @@ -5920,7 +5910,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parse(bool ParseParams) { } if (consumeIf("___Z") || consumeIf("____Z")) { - Node *Encoding = getDerived().parseEncoding(ParseParams); + Node *Encoding = getDerived().parseEncoding(); if (Encoding == nullptr || !consumeIf("_block_invoke")) return nullptr; bool RequireNumber = consumeIf('_'); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h index 99175d7..9f91a64 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h @@ -16,6 +16,32 @@ #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include <cstdint> +// Keep in sync with gdb/gdb/jit.h +extern "C" { + +typedef enum { + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN +} jit_actions_t; + +struct jit_code_entry { + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +}; + +struct jit_descriptor { + uint32_t version; + // This should be jit_actions_t, but we want to be specific about the + // bit-width. + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; +}; +} + extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size); diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index 8784a42..42db4c4 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -293,11 +293,14 @@ public: explicit DominatorTreePrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Verifier pass for the \c DominatorTree. struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Enables verification of dominator trees. diff --git a/llvm/include/llvm/IR/SafepointIRVerifier.h b/llvm/include/llvm/IR/SafepointIRVerifier.h index 246d236..2ee998e 100644 --- a/llvm/include/llvm/IR/SafepointIRVerifier.h +++ b/llvm/include/llvm/IR/SafepointIRVerifier.h @@ -40,6 +40,8 @@ public: explicit SafepointIRVerifierPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index be85c40..1050f24 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -404,7 +404,9 @@ private: }; // Global mapping from mangled symbol names to resolutions. - StringMap<GlobalResolution> GlobalResolutions; + // Make this an optional to guard against accessing after it has been reset + // (to reduce memory after we're done with it). + std::optional<StringMap<GlobalResolution>> GlobalResolutions; void addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, ArrayRef<SymbolResolution> Res, unsigned Partition, diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index f28c1ed..5e704f0 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -244,6 +244,7 @@ def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>; def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap>; def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap>; def : GINodeEquiv<G_FENCE, atomic_fence>; +def : GINodeEquiv<G_PREFETCH, prefetch>; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. // Should be used on defs that subclass GIComplexOperandMatcher<>. diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index 4c29f25..1fe47de 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -362,7 +362,9 @@ public: virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const; /// Allow the target to modify the pass pipeline. - virtual void registerPassBuilderCallbacks(PassBuilder &) {} + // TODO: Populate all pass names by using <Target>PassRegistry.def. + virtual void registerPassBuilderCallbacks(PassBuilder &, + bool PopulateClassToPassNames) {} /// Allow the target to register alias analyses with the AAManager for use /// with the new pass manager. Only affects the "default" AAManager. diff --git a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h index 4136c45..6bc01ec 100644 --- a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h @@ -25,6 +25,7 @@ public: explicit IVUsersPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + static bool isRequired() { return true; } }; } diff --git a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h index 4d1f934..f445e06 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h @@ -24,6 +24,7 @@ class LoopAccessInfoPrinterPass public: explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index b433d2e..365b215 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -215,11 +215,13 @@ class PredicateInfoPrinterPass public: explicit PredicateInfoPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Verifier pass for \c PredicateInfo. struct PredicateInfoVerifierPass : PassInfoMixin<PredicateInfoVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index fb3a6f8..1b7277d 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -77,9 +78,15 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI, }; for (unsigned Idx = 0; Idx != CI->getNumOperandBundles(); Idx++) { - if (CI->getOperandBundleAt(Idx).Inputs.size() > ABA_WasOn && - CI->getOperandBundleAt(Idx).getTagName() != IgnoreBundleTag) - AddAffected(CI->getOperandBundleAt(Idx).Inputs[ABA_WasOn], Idx); + OperandBundleUse Bundle = CI->getOperandBundleAt(Idx); + if (Bundle.getTagName() == "separate_storage") { + assert(Bundle.Inputs.size() == 2 && + "separate_storage must have two args"); + AddAffected(getUnderlyingObject(Bundle.Inputs[0]), Idx); + AddAffected(getUnderlyingObject(Bundle.Inputs[1]), Idx); + } else if (Bundle.Inputs.size() > ABA_WasOn && + Bundle.getTagName() != IgnoreBundleTag) + AddAffected(Bundle.Inputs[ABA_WasOn], Idx); } Value *Cond = CI->getArgOperand(0), *A, *B; diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3de1473..97f60d2 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -69,7 +69,7 @@ static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, cl::init(true)); static cl::opt<bool> EnableSeparateStorageAnalysis("basic-aa-separate-storage", - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); /// SearchLimitReached / SearchTimes shows how often the limit of /// to decompose GEPs is reached. It will affect the precision @@ -1544,28 +1544,25 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, return AliasResult::NoAlias; if (CtxI && EnableSeparateStorageAnalysis) { - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) + for (AssumptionCache::ResultElem &Elem : AC.assumptionsFor(O1)) { + if (!Elem || Elem.Index == AssumptionCache::ExprResultIdx) continue; - AssumeInst *Assume = cast<AssumeInst>(AssumeVH); - - for (unsigned Idx = 0; Idx < Assume->getNumOperandBundles(); Idx++) { - OperandBundleUse OBU = Assume->getOperandBundleAt(Idx); - if (OBU.getTagName() == "separate_storage") { - assert(OBU.Inputs.size() == 2); - const Value *Hint1 = OBU.Inputs[0].get(); - const Value *Hint2 = OBU.Inputs[1].get(); - // This is often a no-op; instcombine rewrites this for us. No-op - // getUnderlyingObject calls are fast, though. - const Value *HintO1 = getUnderlyingObject(Hint1); - const Value *HintO2 = getUnderlyingObject(Hint2); - - if (((O1 == HintO1 && O2 == HintO2) || - (O1 == HintO2 && O2 == HintO1)) && - isValidAssumeForContext(Assume, CtxI, DT)) - return AliasResult::NoAlias; - } + AssumeInst *Assume = cast<AssumeInst>(Elem); + OperandBundleUse OBU = Assume->getOperandBundleAt(Elem.Index); + if (OBU.getTagName() == "separate_storage") { + assert(OBU.Inputs.size() == 2); + const Value *Hint1 = OBU.Inputs[0].get(); + const Value *Hint2 = OBU.Inputs[1].get(); + // This is often a no-op; instcombine rewrites this for us. No-op + // getUnderlyingObject calls are fast, though. + const Value *HintO1 = getUnderlyingObject(Hint1); + const Value *HintO2 = getUnderlyingObject(Hint2); + + if (((O1 == HintO1 && O2 == HintO2) || + (O1 == HintO2 && O2 == HintO1)) && + isValidAssumeForContext(Assume, CtxI, DT)) + return AliasResult::NoAlias; } } } diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 35bdd86..1a9c7c21 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -95,14 +95,14 @@ bool ConstraintSystem::eliminateUsingFM() { IdxUpper++; } - if (MulOverflow(UpperV, ((-1) * LowerLast), M1)) + if (MulOverflow(UpperV, -1 * LowerLast, M1)) return false; if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) { LowerV = LowerRow[IdxLower].Coefficient; IdxLower++; } - if (MulOverflow(LowerV, (UpperLast), M2)) + if (MulOverflow(LowerV, UpperLast, M2)) return false; if (AddOverflow(M1, M2, N)) return false; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 78a8334..241bdd8 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2204,6 +2204,13 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y)))) return Constant::getNullValue(Op0->getType()); + const APInt *C1; + Value *A; + // (A ^ C) & (A ^ ~C) -> 0 + if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) && + match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1)))) + return Constant::getNullValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL)) { // If Op0 is true implies Op1 is true, then Op0 is a subset of Op1. @@ -2473,6 +2480,11 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; + // (A ^ C) | (A ^ ~C) -> -1, i.e. all bits set to one. + if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) && + match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1)))) + return Constant::getAllOnesValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, false)) { diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 87ddfe3..59c96a3 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -969,7 +969,9 @@ LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses LoopPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - AM.getResult<LoopAnalysis>(F).print(OS); + auto &LI = AM.getResult<LoopAnalysis>(F); + OS << "Loop info for function '" << F.getName() << "':\n"; + LI.print(OS); return PreservedAnalyses::all(); } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 37e7153..7522353 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -149,7 +149,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI, return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); - return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; + return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized + : UnableToLegalize; default: LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; @@ -567,7 +568,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(MachineInstr &MI, +static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, + MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI) { MachineBasicBlock &MBB = *MI.getParent(); @@ -596,17 +598,12 @@ static bool isLibCallInTailPosition(MachineInstr &MI, // RET_ReallyLR implicit $x0 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); if (Next != MBB.instr_end() && Next->isCopy()) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("unsupported opcode"); - case TargetOpcode::G_BZERO: + if (MI.getOpcode() == TargetOpcode::G_BZERO) return false; - case TargetOpcode::G_MEMCPY: - case TargetOpcode::G_MEMMOVE: - case TargetOpcode::G_MEMSET: - break; - } + // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the + // mempy/etc routines return the same parameter. For other it will be the + // returned value. Register VReg = MI.getOperand(0).getReg(); if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg()) return false; @@ -622,7 +619,7 @@ static bool isLibCallInTailPosition(MachineInstr &MI, if (Ret->getNumImplicitOperands() != 1) return false; - if (PReg != Ret->getOperand(0).getReg()) + if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg()) return false; // Skip over the COPY that we just validated. @@ -639,34 +636,64 @@ LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args, - const CallingConv::ID CC) { + const CallingConv::ID CC, LostDebugLocObserver &LocObserver, + MachineInstr *MI) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); CallLowering::CallLoweringInfo Info; Info.CallConv = CC; Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = Result; + if (MI) + Info.IsTailCall = + (Result.Ty->isVoidTy() || + Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) && + isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(), + *MIRBuilder.getMRI()); + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; + if (MI && Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + + // Check debug locations before removing the return. + LocObserver.checkpoint(true); + + // We must have a return following the call (or debug insts) to get past + // isLibCallInTailPosition. + do { + MachineInstr *Next = MI->getNextNode(); + assert(Next && + (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) && + "Expected instr following MI to be return or debug inst?"); + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + Next->eraseFromParent(); + } while (MI->getNextNode()); + + // We expect to lose the debug location from the return. + LocObserver.checkpoint(false); + } return LegalizerHelper::Legalized; } LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args) { + ArrayRef<CallLowering::ArgInfo> Args, + LostDebugLocObserver &LocObserver, MachineInstr *MI) { auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); - return createLibcall(MIRBuilder, Name, Result, Args, CC); + return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI); } // Useful for libcalls where all operands have the same type. static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, - Type *OpType) { + Type *OpType, LostDebugLocObserver &LocObserver) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); // FIXME: What does the original arg index mean here? @@ -674,7 +701,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) Args.push_back({MO.getReg(), OpType, 0}); return createLibcall(MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), OpType, 0}, Args); + {MI.getOperand(0).getReg(), OpType, 0}, Args, + LocObserver, &MI); } LegalizerHelper::LegalizeResult @@ -733,8 +761,9 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); - Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI); + Info.IsTailCall = + MI.getOperand(MI.getNumOperands() - 1).getImm() && + isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -789,11 +818,11 @@ static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, - Type *FromType) { + Type *FromType, LostDebugLocObserver &LocObserver) { RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); - return createLibcall(MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), ToType, 0}, - {{MI.getOperand(1).getReg(), FromType, 0}}); + return createLibcall( + MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0}, + {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI); } static RTLIB::Libcall @@ -829,7 +858,8 @@ getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { // LegalizerHelper::LegalizeResult LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &MRI = *MIRBuilder.getMRI(); @@ -850,7 +880,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, auto Res = createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), + LocObserver, nullptr); if (Res != LegalizerHelper::Legalized) return Res; @@ -867,7 +898,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, // content of memory region. LegalizerHelper::LegalizeResult LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &MRI = *MIRBuilder.getMRI(); @@ -892,7 +924,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); return createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), + LocObserver, nullptr); } // The function is used to legalize operations that set default environment @@ -902,7 +935,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, // it is not true, the target must provide custom lowering. LegalizerHelper::LegalizeResult LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &Ctx = MF.getFunction().getContext(); @@ -919,7 +953,8 @@ LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); return createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0})); + CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), + LocObserver, &MI); } LegalizerHelper::LegalizeResult @@ -938,7 +973,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); Type *HLTy = IntegerType::get(Ctx, Size); - auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); if (Status != Legalized) return Status; break; @@ -974,7 +1009,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); return UnableToLegalize; } - auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); if (Status != Legalized) return Status; break; @@ -985,7 +1020,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); if (!FromTy || !ToTy) return UnableToLegalize; - LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); + LegalizeResult Status = + conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver); if (Status != Legalized) return Status; break; @@ -1000,7 +1036,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), - FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); + FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), + LocObserver); if (Status != Legalized) return Status; break; @@ -1015,7 +1052,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); + FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), + LocObserver); if (Status != Legalized) return Status; break; @@ -1032,19 +1070,20 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { return Result; } case TargetOpcode::G_GET_FPMODE: { - LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } case TargetOpcode::G_SET_FPMODE: { - LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } case TargetOpcode::G_RESET_FPMODE: { - LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = + createResetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; @@ -2831,6 +2870,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMIN: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMINIMUM: diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 3fbb937..cbb1a74 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -305,11 +305,7 @@ namespace { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - /// - /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG - /// SrcReg. This introduces an implicit-def of DstReg on coalesced users. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, - bool IsSubregToReg); + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1347,7 +1343,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (DstReg.isPhysical()) { Register NewDstReg = DstReg; - unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx); + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); if (NewDstIdx) NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); @@ -1496,7 +1493,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MRI->setRegClass(DstReg, NewRC); // Update machine operands and add flags. - updateRegDefsUses(DstReg, DstReg, DstIdx, false); + updateRegDefsUses(DstReg, DstReg, DstIdx); NewMI.getOperand(0).setSubReg(NewIdx); // updateRegDefUses can add an "undef" flag to the definition, since // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make @@ -1816,7 +1813,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx, bool IsSubregToReg) { + unsigned SubIdx) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1856,8 +1853,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); - bool FullDef = true; - // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); @@ -1865,13 +1860,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // Adjust <undef> flags in case of sub-register joins. We don't want to // turn a full def into a read-modify-write sub-register def and vice // versa. - if (SubIdx && MO.isDef()) { + if (SubIdx && MO.isDef()) MO.setIsUndef(!Reads); - if (!Reads) - FullDef = false; - } - // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. if (MO.isUse() && !DstIsPhys) { @@ -1903,25 +1894,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, MO.substVirtReg(DstReg, SubIdx, *TRI); } - if (IsSubregToReg && !FullDef) { - // If the coalesed instruction doesn't fully define the register, we need - // to preserve the original super register liveness for SUBREG_TO_REG. - // - // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, - // but it introduces liveness for other subregisters. Downstream users may - // have been relying on those bits, so we need to ensure their liveness is - // captured with a def of other lanes. - - // FIXME: Need to add new subrange if tracking subranges. We could also - // skip adding this if we knew the other lanes are dead, and only for - // other lanes. - - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "this should update subranges"); - MachineInstrBuilder MIB(*MF, UseMI); - MIB.addReg(DstReg, RegState::ImplicitDefine); - } - LLVM_DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugInstr()) @@ -2121,8 +2093,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); } - const bool IsSubregToReg = CopyMI->isSubregToReg(); - ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2190,12 +2160,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) { - assert(!IsSubregToReg && "can this happen?"); - updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false); - } - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), - IsSubregToReg); + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eafa95c..464e1be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7987,7 +7987,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // If OR can be rewritten into ADD, try combines based on ADD. if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) + DAG.isADDLike(SDValue(N, 0))) if (SDValue Combined = visitADDLike(N)) return Combined; @@ -10055,7 +10055,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Shl0.getNode()); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); + SDNodeFlags Flags; + // Preserve the disjoint flag for Or. + if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint()) + Flags.setDisjoint(true); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e17bba..4151964 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5022,7 +5022,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: case ISD::AND: - case ISD::OR: case ISD::XOR: case ISD::ROTL: case ISD::ROTR: @@ -5062,6 +5061,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()); + // Matches hasPoisonGeneratingFlags(). + case ISD::OR: + return ConsiderFlags && Op->getFlags().hasDisjoint(); + case ISD::INSERT_VECTOR_ELT:{ // Ensure that the element index is in bounds. EVT VecVT = Op.getOperand(0).getValueType(); @@ -5085,7 +5088,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool SelectionDAG::isADDLike(SDValue Op) const { unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::OR) - return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); + return Op->getFlags().hasDisjoint() || + haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); if (Opcode == ISD::XOR) return isMinSignedConstant(Op.getOperand(1)); return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3c4b285..192f7bc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3354,6 +3354,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { } if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) Flags.setExact(ExactOp->isExact()); + if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(&I)) + Flags.setDisjoint(DisjointOp->isDisjoint()); if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) Flags.copyFMF(*FPOp); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 78cc600..4ae3000 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -597,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; + if (getFlags().hasDisjoint()) + OS << " disjoint"; + if (getFlags().hasNonNeg()) OS << " nneg"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c597754..66cdd75 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1064,10 +1064,9 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue ResultAVG = - DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA), - DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB)); - return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, - ResultAVG); + DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT), + DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT)); + return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT); } /// Look at Op. At this point, we know that only the OriginalDemandedBits of the @@ -1468,14 +1467,24 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::OR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - + SDNodeFlags Flags = Op.getNode()->getFlags(); if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, - Depth + 1)) + Depth + 1)) { + if (Flags.hasDisjoint()) { + Flags.setDisjoint(false); + Op->setFlags(Flags); + } return true; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, - Known2, TLO, Depth + 1)) + Known2, TLO, Depth + 1)) { + if (Flags.hasDisjoint()) { + Flags.setDisjoint(false); + Op->setFlags(Flags); + } return true; + } assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp index 117b849..83f3cdc 100644 --- a/llvm/lib/Demangle/Demangle.cpp +++ b/llvm/lib/Demangle/Demangle.cpp @@ -47,8 +47,7 @@ static bool isRustEncoding(std::string_view S) { return starts_with(S, "_R"); } static bool isDLangEncoding(std::string_view S) { return starts_with(S, "_D"); } bool llvm::nonMicrosoftDemangle(std::string_view MangledName, - std::string &Result, bool CanHaveLeadingDot, - bool ParseParams) { + std::string &Result, bool CanHaveLeadingDot) { char *Demangled = nullptr; // Do not consider the dot prefix as part of the demangled symbol name. @@ -58,7 +57,7 @@ bool llvm::nonMicrosoftDemangle(std::string_view MangledName, } if (isItaniumEncoding(MangledName)) - Demangled = itaniumDemangle(MangledName, ParseParams); + Demangled = itaniumDemangle(MangledName); else if (isRustEncoding(MangledName)) Demangled = rustDemangle(MangledName); else if (isDLangEncoding(MangledName)) diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp index 5c21b06..e3f208f 100644 --- a/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -366,13 +366,13 @@ public: using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>; -char *llvm::itaniumDemangle(std::string_view MangledName, bool ParseParams) { +char *llvm::itaniumDemangle(std::string_view MangledName) { if (MangledName.empty()) return nullptr; Demangler Parser(MangledName.data(), MangledName.data() + MangledName.length()); - Node *AST = Parser.parse(ParseParams); + Node *AST = Parser.parse(); if (!AST) return nullptr; diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp index 8eca874..8a4145a 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp @@ -21,31 +21,8 @@ // First version as landed in August 2009 static constexpr uint32_t JitDescriptorVersion = 1; -// Keep in sync with gdb/gdb/jit.h extern "C" { -typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN -} jit_actions_t; - -struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -}; - -struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; -}; - // We put information about the JITed function in this global, which the // debugger reads. Make sure to specify the version statically, because the // debugger checks the version before we can set it during runtime. diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index eab05ee..c6dc42e 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -2115,6 +2115,10 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) { if (F.hasFnAttribute(Attribute::OptimizeNone)) return /*Changed*/ false; + // FIXME: https://github.com/llvm/llvm-project/issues/76545 + if (F.hasFnAttribute(Attribute::SanitizeHWAddress)) + return /*Changed*/ false; + bool Changed = false; auto *DL = &F.getParent()->getDataLayout(); // Collect a map of {backing storage : dbg.declares} (currently "backing diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 05836fd..6a1e53b 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -592,7 +592,9 @@ LTO::LTO(Config Conf, ThinBackend Backend, unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode) : Conf(std::move(Conf)), RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), - ThinLTO(std::move(Backend)), LTOMode(LTOMode) {} + ThinLTO(std::move(Backend)), + GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()), + LTOMode(LTOMode) {} // Requires a destructor for MapVector<BitcodeModule>. LTO::~LTO() = default; @@ -610,7 +612,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, assert(ResI != ResE); SymbolResolution Res = *ResI++; - auto &GlobalRes = GlobalResolutions[Sym.getName()]; + auto &GlobalRes = (*GlobalResolutions)[Sym.getName()]; GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); if (Res.Prevailing) { assert(!GlobalRes.Prevailing && @@ -1125,7 +1127,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { // Compute "dead" symbols, we don't want to import/export these! DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions; - for (auto &Res : GlobalResolutions) { + for (auto &Res : *GlobalResolutions) { // Normally resolution have IR name of symbol. We can do nothing here // otherwise. See comments in GlobalResolution struct for more details. if (Res.second.IRName.empty()) @@ -1169,6 +1171,8 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { Error Result = runRegularLTO(AddStream); if (!Result) + // This will reset the GlobalResolutions optional once done with it to + // reduce peak memory before importing. Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); if (StatsFile) @@ -1273,8 +1277,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { // This returns true when the name is local or not defined. Locals are // expected to be handled separately. auto IsVisibleToRegularObj = [&](StringRef name) { - auto It = GlobalResolutions.find(name); - return (It == GlobalResolutions.end() || It->second.VisibleOutsideSummary); + auto It = GlobalResolutions->find(name); + return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary); }; // If allowed, upgrade public vcall visibility metadata to linkage unit @@ -1291,7 +1295,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); if (!Conf.CodeGenOnly) { - for (const auto &R : GlobalResolutions) { + for (const auto &R : *GlobalResolutions) { GlobalValue *GV = RegularLTO.CombinedModule->getNamedValue(R.second.IRName); if (!R.second.isPrevailingIRSymbol()) @@ -1708,8 +1712,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, // This returns true when the name is local or not defined. Locals are // expected to be handled separately. auto IsVisibleToRegularObj = [&](StringRef name) { - auto It = GlobalResolutions.find(name); - return (It == GlobalResolutions.end() || + auto It = GlobalResolutions->find(name); + return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary); }; @@ -1739,15 +1743,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing); } - if (Conf.OptLevel > 0) - ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - isPrevailing, ImportLists, ExportLists); - // Figure out which symbols need to be internalized. This also needs to happen // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - for (auto &Res : GlobalResolutions) { + for (auto &Res : *GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. if (Res.second.Partition != GlobalResolution::External || @@ -1760,6 +1760,16 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ExportedGUIDs.insert(GUID); } + // Reset the GlobalResolutions to deallocate the associated memory, as there + // are no further accesses. We specifically want to do this before computing + // cross module importing, which adds to peak memory via the computed import + // and export lists. + GlobalResolutions.reset(); + + if (Conf.OptLevel > 0) + ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + isPrevailing, ImportLists, ExportLists); + // Any functions referenced by the jump table in the regular LTO object must // be exported. for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs()) diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index a85182a..9dae026 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -704,8 +704,14 @@ static void AttemptToFoldSymbolOffsetDifference( } int64_t Num; + unsigned Count; if (DF) { Displacement += DF->getContents().size(); + } else if (auto *AF = dyn_cast<MCAlignFragment>(FI); + AF && Layout && + !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( + *AF, Count)) { + Displacement += Asm->computeFragmentSize(*Layout, *AF); } else if (auto *FF = dyn_cast<MCFillFragment>(FI); FF && FF->getNumValues().evaluateAsAbsolute(Num)) { Displacement += Num * FF->getValueSize(); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f94bd42..439f749 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -452,9 +452,10 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, std::optional<PGOOptions> PGOOpt, PassInstrumentationCallbacks *PIC) : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) { + bool ShouldPopulateClassToPassNames = PIC && shouldPopulateClassToPassNames(); if (TM) - TM->registerPassBuilderCallbacks(*this); - if (PIC && shouldPopulateClassToPassNames()) { + TM->registerPassBuilderCallbacks(*this, ShouldPopulateClassToPassNames); + if (ShouldPopulateClassToPassNames) { #define MODULE_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 9b8162c..1ea63a5 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -1231,15 +1231,6 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, // Only extend the RHS within the instruction if there is a valid extend type. if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && isValueAvailable(RHS)) { - if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) - if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) - if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { - Register RHSReg = getRegForValue(SI->getOperand(0)); - if (!RHSReg) - return 0; - return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, - C->getZExtValue(), SetFlags, WantResult); - } Register RHSReg = getRegForValue(RHS); if (!RHSReg) return 0; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index cb63d87..10ad5b1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -12586,6 +12586,7 @@ def : TokenAlias<".4S", ".4s">; def : TokenAlias<".2D", ".2d">; def : TokenAlias<".1Q", ".1q">; def : TokenAlias<".2H", ".2h">; +def : TokenAlias<".2B", ".2b">; def : TokenAlias<".B", ".b">; def : TokenAlias<".H", ".h">; def : TokenAlias<".S", ".s">; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 738a52e..380f6e1f 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -810,7 +810,7 @@ defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; } -let Predicates = [HasSME2p1, HasB16B16] in { +let Predicates = [HasSME2, HasB16B16] in { defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 1d0e8be..bb5f1f6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -989,6 +989,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, s16, 8) .lower(); + // For fmul reductions we need to split up into individual operations. We + // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of + // smaller types, followed by scalarizing what remains. + getActionDefinitionsBuilder(G_VECREDUCE_FMUL) + .minScalarOrElt(0, MinFPScalar) + .clampMaxNumElements(1, s64, 2) + .clampMaxNumElements(1, s32, 4) + .clampMaxNumElements(1, s16, 8) + .clampMaxNumElements(1, s32, 2) + .clampMaxNumElements(1, s16, 4) + .scalarize(1) + .lower(); + getActionDefinitionsBuilder(G_VECREDUCE_ADD) .legalFor({{s8, v16s8}, {s8, v8s8}, @@ -1137,8 +1150,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) verify(*ST.getInstrInfo()); } -bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AArch64LegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); GISelChangeObserver &Observer = Helper.Observer; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 19f77ba..e96ec6d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -28,7 +28,8 @@ class AArch64LegalizerInfo : public LegalizerInfo { public: AArch64LegalizerInfo(const AArch64Subtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index b755254..789ec81 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10082,6 +10082,12 @@ multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDP def : InstAlias<mnemonic # "\t$Pd, $Zn", (!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _H) PPR16:$Pd, ZPRAny:$Zn, 0), 0>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _S) PPR32:$Pd, ZPRAny:$Zn, 0), 0>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _D) PPR64:$Pd, ZPRAny:$Zn, 0), 0>; // any_lane def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))), @@ -10143,6 +10149,12 @@ multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp, def : InstAlias<mnemonic # "\t$Zd, $Pn", (!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, 0, PPR16:$Pn), 0>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, 0, PPR32:$Pn), 0>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, 0, PPR64:$Pn), 0>; // Merge def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 88ef4b5..ad8dcda 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2764,7 +2764,9 @@ static bool isConstant(const MachineInstr &MI) { void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { - const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); + unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1; + const MachineInstr *PtrMI = + MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg()); assert(PtrMI); @@ -2817,6 +2819,10 @@ bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; + if (MI.getOpcode() == AMDGPU::G_PREFETCH) + return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() == + AMDGPU::SGPRRegBankID; + const Instruction *I = dyn_cast<Instruction>(Ptr); return I && I->getMetadata("amdgpu.uniform"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fbee288..dfbe5c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1996,8 +1996,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, verify(*ST.getInstrInfo()); } -bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AMDGPULegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &B = Helper.MIRBuilder; MachineRegisterInfo &MRI = *B.getMRI(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 855fa0d..1fa0648 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -35,7 +35,8 @@ public: AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index fba0604..92182ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3263,17 +3263,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } - unsigned PtrBank = - getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID); + Register PtrReg = MI.getOperand(0).getReg(); + unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID); if (PtrBank == AMDGPU::VGPRRegBankID) { MI.eraseFromParent(); return; } - // FIXME: There is currently no support for prefetch in global isel. - // There is no node equivalence and what's worse there is no MMO produced - // for a prefetch on global isel path. - // Prefetch does not affect execution so erase it for now. - MI.eraseFromParent(); + unsigned AS = MRI.getType(PtrReg).getAddressSpace(); + if (!AMDGPU::isFlatGlobalAddrSpace(AS) && + AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + MI.eraseFromParent(); + return; + } + applyDefaultMapping(OpdMapper); return; } default: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fdc2077..0f3bb3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -620,7 +620,8 @@ void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { AAM.registerFunctionAnalysis<AMDGPUAA>(); } -void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void AMDGPUTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [this](StringRef PassName, ModulePassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 9051a61..99c9db3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -51,7 +51,8 @@ public: return TLOF.get(); } - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; void registerDefaultAliasAnalyses(AAManager &) override; /// Get the integer value of a null pointer in the given address space. diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index abd7e91..5f2b7c0 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -166,6 +166,8 @@ public: ImmTyEndpgm, ImmTyWaitVDST, ImmTyWaitEXP, + ImmTyWaitVAVDst, + ImmTyWaitVMVSrc, }; // Immediate operand kind. @@ -909,6 +911,8 @@ public: bool isEndpgm() const; bool isWaitVDST() const; bool isWaitEXP() const; + bool isWaitVAVDst() const; + bool isWaitVMVSrc() const; auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { return std::bind(P, *this); @@ -1029,6 +1033,7 @@ public: } static void printImmTy(raw_ostream& OS, ImmTy Type) { + // clang-format off switch (Type) { case ImmTyNone: OS << "None"; break; case ImmTyGDS: OS << "GDS"; break; @@ -1086,7 +1091,10 @@ public: case ImmTyEndpgm: OS << "Endpgm"; break; case ImmTyWaitVDST: OS << "WaitVDST"; break; case ImmTyWaitEXP: OS << "WaitEXP"; break; + case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; + case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; } + // clang-format on } void print(raw_ostream &OS) const override { @@ -9192,6 +9200,14 @@ bool AMDGPUOperand::isWaitVDST() const { return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); } +bool AMDGPUOperand::isWaitVAVDst() const { + return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm()); +} + +bool AMDGPUOperand::isWaitVMVSrc() const { + return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm()); +} + //===----------------------------------------------------------------------===// // VINTERP //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td new file mode 100644 index 0000000..54ef785 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td @@ -0,0 +1,191 @@ +//===-- DSDIRInstructions.td - LDS/VDS Direct Instruction Definitions -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+) +//===----------------------------------------------------------------------===// + +class LDSDIRe<bits<2> op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23-22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +class VDSDIRe<bits<2> op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + bits<1> waitvsrc; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23} = waitvsrc; + let Inst{22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +//===----------------------------------------------------------------------===// +// LDSDIR/VDSDIR Classes +//===----------------------------------------------------------------------===// + +class LDSDIR_getIns<bit direct> { + dag ret = !if(direct, + (ins wait_vdst:$waitvdst), + (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst) + ); +} + +class VDSDIR_getIns<bit direct> { + dag ret = !if(direct, + (ins wait_va_vdst:$waitvdst, wait_va_vsrc:$waitvsrc), + (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_va_vdst:$waitvdst, + wait_va_vsrc:$waitvsrc) + ); +} + +class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : + InstSI<(outs VGPR_32:$vdst), ins, asm> { + let LDSDIR = 1; + let EXP_CNT = 1; + + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 0; + + string Mnemonic = opName; + let UseNamedOperandTable = 1; + + let Uses = [M0, EXEC]; + let DisableWQM = 0; + let SchedRW = [WriteLDS]; + + bit is_direct; + let is_direct = direct; +} + +class DSDIR_Pseudo<string opName, dag ins, bit direct> : + DSDIR_Common<opName, "", ins, direct>, + SIMCInstr<opName, SIEncodingFamily.NONE> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +class LDSDIR_getAsm<bit direct> { + string ret = !if(direct, + " $vdst$waitvdst", + " $vdst, $attr$attrchan$waitvdst" + ); +} + +class VDSDIR_getAsm<bit direct> { + string ret = !if(direct, + " $vdst$waitvdst$waitvsrc", + " $vdst, $attr$attrchan$waitvdst$waitvsrc" + ); +} + +class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> : + DSDIR_Common<lds.Mnemonic, + lds.Mnemonic # asm, + ins, + lds.is_direct>, + SIMCInstr <lds.Mnemonic, subtarget> { + let isPseudo = 0; + let isCodeGenOnly = 0; +} + +//===----------------------------------------------------------------------===// +// LDS/VDS Direct Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGFX11Only in { + +def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>; +def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (LDS_DIRECT_LOAD 0) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) +>; + +} // End SubtargetPredicate = isGFX11Only + +let SubtargetPredicate = isGFX12Plus in { + +def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>; +def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (DS_DIRECT_LOAD 0, 1) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1) +>; + +} // End SubtargetPredicate = isGFX12Only + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +multiclass DSDIR_Real_gfx11<bits<2> op, + DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> { + def _gfx11 : DSDIR_Real<lds, lds.InOperandList, + LDSDIR_getAsm<lds.is_direct>.ret, + SIEncodingFamily.GFX11>, + LDSDIRe<op, lds.is_direct> { + let AssemblerPredicate = isGFX11Only; + let DecoderNamespace = "GFX11"; + } +} + +defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>; +defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>; + +//===----------------------------------------------------------------------===// +// GFX12+ +//===----------------------------------------------------------------------===// + +multiclass DSDIR_Real_gfx12<bits<2> op, + DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> { + def _gfx12 : DSDIR_Real<lds, lds.InOperandList, + VDSDIR_getAsm<lds.is_direct>.ret, + SIEncodingFamily.GFX12>, + VDSDIRe<op, lds.is_direct> { + let AssemblerPredicate = isGFX12Plus; + let DecoderNamespace = "GFX12"; + } +} + +defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>; +defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>; diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td deleted file mode 100644 index 4956a15..0000000 --- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td +++ /dev/null @@ -1,116 +0,0 @@ -//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// LDSDIR encoding -//===----------------------------------------------------------------------===// - -class LDSDIRe<bits<2> op, bit is_direct> : Enc32 { - // encoding fields - bits<2> attrchan; - bits<6> attr; - bits<4> waitvdst; - bits<8> vdst; - - // encoding - let Inst{31-24} = 0xce; // encoding - let Inst{23-22} = 0x0; // reserved - let Inst{21-20} = op; - let Inst{19-16} = waitvdst; - let Inst{15-10} = !if(is_direct, ?, attr); - let Inst{9-8} = !if(is_direct, ?, attrchan); - let Inst{7-0} = vdst; -} - -//===----------------------------------------------------------------------===// -// LDSDIR Classes -//===----------------------------------------------------------------------===// - -class LDSDIR_getIns<bit direct> { - dag ret = !if(direct, - (ins wait_vdst:$waitvdst), - (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst) - ); -} - -class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI< - (outs VGPR_32:$vdst), - LDSDIR_getIns<direct>.ret, - asm> { - let LDSDIR = 1; - let EXP_CNT = 1; - - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 0; - - string Mnemonic = opName; - let UseNamedOperandTable = 1; - - let Uses = [M0, EXEC]; - let DisableWQM = 0; - let SchedRW = [WriteLDS]; - - bit is_direct; - let is_direct = direct; -} - -class LDSDIR_Pseudo<string opName, bit direct> : - LDSDIR_Common<opName, "", direct>, - SIMCInstr<opName, SIEncodingFamily.NONE> { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class LDSDIR_getAsm<bit direct> { - string ret = !if(direct, - " $vdst$waitvdst", - " $vdst, $attr$attrchan$waitvdst" - ); -} - -class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> : - LDSDIR_Common<lds.Mnemonic, - lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret, - lds.is_direct>, - SIMCInstr <lds.Mnemonic, subtarget>, - LDSDIRe<op, lds.is_direct> { - let isPseudo = 0; - let isCodeGenOnly = 0; -} - -//===----------------------------------------------------------------------===// -// LDS Direct Instructions -//===----------------------------------------------------------------------===// - -def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>; -def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>; - -def : GCNPat < - (f32 (int_amdgcn_lds_direct_load M0)), - (LDS_DIRECT_LOAD 0) ->; - -def : GCNPat < - (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), - (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) ->; - -//===----------------------------------------------------------------------===// -// GFX11+ -//===----------------------------------------------------------------------===// - -multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> { - def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> { - let AssemblerPredicate = isGFX11Plus; - let DecoderNamespace = "GFX11"; - } -} - -defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>; -defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index edc244d..ef1b85f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -639,6 +639,20 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo, printU4ImmDecOperand(MI, OpNo, O); } +void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " wait_va_vdst:"; + printU4ImmDecOperand(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " wait_vm_vsrc:"; + printU4ImmDecOperand(MI, OpNo, O); +} + void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 95c26de..f2f985f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -161,6 +161,10 @@ private: raw_ostream &O); void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVAVDst(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVMVSrc(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, unsigned N); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 173c877..50724fd 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1144,6 +1144,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">; def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">; +def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">; +def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">; class KImmFPOperand<ValueType vt> : ImmOperand<vt> { let OperandNamespace = "AMDGPU"; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8310c6b..0f12727 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -30,7 +30,7 @@ include "SMInstructions.td" include "FLATInstructions.td" include "BUFInstructions.td" include "EXPInstructions.td" -include "LDSDIRInstructions.td" +include "DSDIRInstructions.td" include "VINTERPInstructions.td" //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 59d6ccf..5e6c349 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } continue; } else if (Opcode == AMDGPU::LDS_PARAM_LOAD || - Opcode == AMDGPU::LDS_DIRECT_LOAD) { + Opcode == AMDGPU::DS_PARAM_LOAD || + Opcode == AMDGPU::LDS_DIRECT_LOAD || + Opcode == AMDGPU::DS_DIRECT_LOAD) { // Mark these STRICTWQM, but only for the instruction, not its operands. // This avoid unnecessarily marking M0 as requiring WQM. InstrInfo &II = Instructions[&MI]; diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 3297847..be21cf0 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -977,20 +977,35 @@ def : GCNPat < } } // let OtherPredicates = [HasShaderCyclesRegister] -multiclass SMPrefetchPat<string type, int cache_type> { +def i32imm_zero : TImmLeaf <i32, [{ + return Imm == 0; +}]>; + +def i32imm_one : TImmLeaf <i32, [{ + return Imm == 1; +}]>; + +multiclass SMPrefetchPat<string type, TImmLeaf cache_type> { def : GCNPat < - (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)), + (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type), (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0)) >; def : GCNPat < - (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)), + (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type), (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0)) >; + + def : GCNPat < + (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type), + (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) + (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)), + 0, (i32 SGPR_NULL), (i8 0)) + >; } -defm : SMPrefetchPat<"INST", 0>; -defm : SMPrefetchPat<"DATA", 1>; +defm : SMPrefetchPat<"INST", i32imm_zero>; +defm : SMPrefetchPat<"DATA", i32imm_one>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 3ffde86..abea0fe 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -362,8 +362,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, llvm_unreachable("Unsupported size for FCmp predicate"); } -bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; @@ -392,7 +392,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, OriginalResult}; auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy, 0}, {{MI.getOperand(1).getReg(), ArgTy, 0}, - {MI.getOperand(2).getReg(), ArgTy, 0}}); + {MI.getOperand(2).getReg(), ArgTy, 0}}, + LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; break; @@ -428,7 +429,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, auto Status = createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy, 0}, {{MI.getOperand(2).getReg(), ArgTy, 0}, - {MI.getOperand(3).getReg(), ArgTy, 0}}); + {MI.getOperand(3).getReg(), ArgTy, 0}}, + LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/llvm/lib/Target/ARM/ARMLegalizerInfo.h index f1c2e9c..3636cc6 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.h +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -28,7 +28,8 @@ class ARMLegalizerInfo : public LegalizerInfo { public: ARMLegalizerInfo(const ARMSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; private: void setFCmpLibcallsGNU(); diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index ab0db576..8973684 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -108,7 +108,8 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { return new BPFPassConfig(*this, PM); } -void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void BPFTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &FPM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index 4e6adc7..0a28394 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -42,7 +42,8 @@ public: return TLOF.get(); } - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; }; } diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h index 8ffa1d7..bce4116 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h +++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h @@ -36,6 +36,7 @@ class DXILResourcePrinterPass : public PassInfoMixin<DXILResourcePrinterPass> { public: explicit DXILResourcePrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// The legacy pass manager's analysis pass to compute DXIL resource diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index d5cb488..06938f8 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -100,7 +100,8 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT, DirectXTargetMachine::~DirectXTargetMachine() {} -void DirectXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void DirectXTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, ModulePassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h index d04c375..428beaf 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.h +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h @@ -47,7 +47,8 @@ public: } TargetTransformInfo getTargetTransformInfo(const Function &F) const override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; }; } // namespace llvm diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 590e464..e7a692d 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -274,7 +274,8 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void HexagonTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerLateLoopOptimizationsEPCallback( [=](LoopPassManager &LPM, OptimizationLevel Level) { LPM.addPass(HexagonLoopIdiomRecognitionPass()); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index dddd79a..c5fed0c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -34,7 +34,8 @@ public: ~HexagonTargetMachine() override; const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 14f2620..f5e9423 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -330,8 +330,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { verify(*ST.getInstrInfo()); } -bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool MipsLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/llvm/lib/Target/Mips/MipsLegalizerInfo.h index 05027b7..63daebf 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.h +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.h @@ -25,7 +25,8 @@ class MipsLegalizerInfo : public LegalizerInfo { public: MipsLegalizerInfo(const MipsSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 8d89576..fad69f5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -225,7 +225,8 @@ void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { AAM.registerFunctionAnalysis<NVPTXAA>(); } -void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void NVPTXTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index cfdd8da..9e6bf92 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -69,7 +69,8 @@ public: void registerDefaultAliasAnalyses(AAManager &AAM) override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 079906d..61bae58 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -411,8 +411,9 @@ bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, return true; } -bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool RISCVLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; GISelChangeObserver &Observer = Helper.Observer; switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index 48c3697..4335bd0 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -30,7 +30,8 @@ class RISCVLegalizerInfo : public LegalizerInfo { public: RISCVLegalizerInfo(const RISCVSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03a59f8..27bb69d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, - ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, - ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, + ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -12850,9 +12850,9 @@ struct CombineResult; /// Helper class for folding sign/zero extensions. /// In particular, this class is used for the following combines: -/// add | add_vl -> vwadd(u) | vwadd(u)_w -/// sub | sub_vl -> vwsub(u) | vwsub(u)_w -/// mul | mul_vl -> vwmul(u) | vwmul_su +/// add_vl -> vwadd(u) | vwadd(u)_w +/// sub_vl -> vwsub(u) | vwsub(u)_w +/// mul_vl -> vwmul(u) | vwmul_su /// /// An object of this class represents an operand of the operation we want to /// combine. @@ -12897,8 +12897,6 @@ struct NodeExtensionHelper { /// E.g., for zext(a), this would return a. SDValue getSource() const { switch (OrigOperand.getOpcode()) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return OrigOperand.getOperand(0); @@ -12915,8 +12913,7 @@ struct NodeExtensionHelper { /// Get or create a value that can feed \p Root with the given extension \p /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. /// \see ::getSource(). - SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, + SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, std::optional<bool> SExt) const { if (!SExt.has_value()) return OrigOperand; @@ -12931,10 +12928,8 @@ struct NodeExtensionHelper { // If we need an extension, we should be changing the type. SDLoc DL(Root); - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); + auto [Mask, VL] = getMaskAndVL(Root); switch (OrigOperand.getOpcode()) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); @@ -12974,15 +12969,12 @@ struct NodeExtensionHelper { /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { - case ISD::ADD: case RISCVISD::ADD_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; - case ISD::MUL: case RISCVISD::MUL_VL: return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; - case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -12995,8 +12987,7 @@ struct NodeExtensionHelper { /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> /// newOpcode(a, b). static unsigned getSUOpcode(unsigned Opcode) { - assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && - "SU is only supported for MUL"); + assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); return RISCVISD::VWMULSU_VL; } @@ -13004,10 +12995,8 @@ struct NodeExtensionHelper { /// newOpcode(a, b). static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { - case ISD::ADD: case RISCVISD::ADD_VL: return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; - case ISD::SUB: case RISCVISD::SUB_VL: return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; default: @@ -13017,33 +13006,19 @@ struct NodeExtensionHelper { using CombineToTry = std::function<std::optional<CombineResult>( SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, - const NodeExtensionHelper & /*RHS*/, SelectionDAG &, - const RISCVSubtarget &)>; + const NodeExtensionHelper & /*RHS*/)>; /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } /// Helper method to set the various fields of this struct based on the /// type of \p Root. - void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { SupportsZExt = false; SupportsSExt = false; EnforceOneUse = true; CheckMask = true; - unsigned Opc = OrigOperand.getOpcode(); - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: { - if (OrigOperand.getValueType().isVector()) { - SupportsZExt = Opc == ISD::ZERO_EXTEND; - SupportsSExt = Opc == ISD::SIGN_EXTEND; - SDLoc DL(Root); - MVT VT = Root->getSimpleValueType(0); - std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - } - break; - } + switch (OrigOperand.getOpcode()) { case RISCVISD::VZEXT_VL: SupportsZExt = true; Mask = OrigOperand.getOperand(1); @@ -13099,16 +13074,8 @@ struct NodeExtensionHelper { } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { + static bool isSupportedRoot(const SDNode *Root) { switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isTypeLegal(Root->getValueType(0))) - return false; - return Root->getValueType(0).isScalableVector(); - } case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: @@ -13123,10 +13090,9 @@ struct NodeExtensionHelper { } /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). - NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " - "unsupported root"); + NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { + assert(isSupportedRoot(Root) && "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); OrigOperand = Root->getOperand(OperandIdx); @@ -13142,7 +13108,7 @@ struct NodeExtensionHelper { SupportsZExt = Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; SupportsSExt = !SupportsZExt; - std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); + std::tie(Mask, VL) = getMaskAndVL(Root); CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. @@ -13151,7 +13117,7 @@ struct NodeExtensionHelper { } [[fallthrough]]; default: - fillUpExtensionSupport(Root, DAG, Subtarget); + fillUpExtensionSupport(Root, DAG); break; } } @@ -13167,27 +13133,14 @@ struct NodeExtensionHelper { } /// Helper function to get the Mask and VL from \p Root. - static std::pair<SDValue, SDValue> - getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root, DAG) && "Unexpected root"); - switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: { - SDLoc DL(Root); - MVT VT = Root->getSimpleValueType(0); - return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - } - default: - return std::make_pair(Root->getOperand(3), Root->getOperand(4)); - } + static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { + assert(isSupportedRoot(Root) && "Unexpected root"); + return std::make_pair(Root->getOperand(3), Root->getOperand(4)); } /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); + bool areVLAndMaskCompatible(const SDNode *Root) const { + auto [Mask, VL] = getMaskAndVL(Root); return isMaskCompatible(Mask) && isVLCompatible(VL); } @@ -13195,14 +13148,11 @@ struct NodeExtensionHelper { /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { switch (N->getOpcode()) { - case ISD::ADD: - case ISD::MUL: case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return true; - case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13247,25 +13197,14 @@ struct CombineResult { /// Return a value that uses TargetOpcode and that can be used to replace /// Root. /// The actual replacement is *not* done in that method. - SDValue materialize(SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { + SDValue materialize(SelectionDAG &DAG) const { SDValue Mask, VL, Merge; - std::tie(Mask, VL) = - NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); - switch (Root->getOpcode()) { - default: - Merge = Root->getOperand(2); - break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - Merge = DAG.getUNDEF(Root->getValueType(0)); - break; - } + std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); + Merge = Root->getOperand(2); return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), - RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), - Merge, Mask, VL); + LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), + RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, + Mask, VL); } }; @@ -13282,16 +13221,15 @@ struct CombineResult { static std::optional<CombineResult> canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, bool AllowSExt, - bool AllowZExt, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + bool AllowZExt) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/false, RHS, + /*SExtRHS=*/false); if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/true), @@ -13308,10 +13246,9 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/true, DAG, Subtarget); + /*AllowZExt=*/true); } /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) @@ -13320,9 +13257,8 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + const NodeExtensionHelper &RHS) { + if (!RHS.areVLAndMaskCompatible(Root)) return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar @@ -13346,10 +13282,9 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/false, DAG, Subtarget); + /*AllowZExt=*/false); } /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) @@ -13358,10 +13293,9 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, - /*AllowZExt=*/true, DAG, Subtarget); + /*AllowZExt=*/true); } /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) @@ -13370,13 +13304,10 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - + const NodeExtensionHelper &RHS) { if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); @@ -13386,8 +13317,6 @@ SmallVector<NodeExtensionHelper::CombineToTry> NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { SmallVector<CombineToTry> Strategies; switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: case RISCVISD::ADD_VL: case RISCVISD::SUB_VL: // add|sub -> vwadd(u)|vwsub(u) @@ -13395,7 +13324,6 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // add|sub -> vwadd(u)_w|vwsub(u)_w Strategies.push_back(canFoldToVW_W); break; - case ISD::MUL: case RISCVISD::MUL_VL: // mul -> vwmul(u) Strategies.push_back(canFoldToVWWithSameExtension); @@ -13426,14 +13354,12 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// mul_vl -> vwmul(u) | vwmul_su /// vwadd_w(u) -> vwadd(u) /// vwub_w(u) -> vwadd(u) -static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const RISCVSubtarget &Subtarget) { +static SDValue +combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) - return SDValue(); - + assert(NodeExtensionHelper::isSupportedRoot(N) && + "Shouldn't have called this method"); SmallVector<SDNode *> Worklist; SmallSet<SDNode *, 8> Inserted; Worklist.push_back(N); @@ -13442,11 +13368,11 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) + if (!NodeExtensionHelper::isSupportedRoot(Root)) return SDValue(); - NodeExtensionHelper LHS(N, 0, DAG, Subtarget); - NodeExtensionHelper RHS(N, 1, DAG, Subtarget); + NodeExtensionHelper LHS(N, 0, DAG); + NodeExtensionHelper RHS(N, 1, DAG); auto AppendUsersIfNeeded = [&Worklist, &Inserted](const NodeExtensionHelper &Op) { if (Op.needToPromoteOtherUsers()) { @@ -13473,8 +13399,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, for (NodeExtensionHelper::CombineToTry FoldingStrategy : FoldingStrategies) { - std::optional<CombineResult> Res = - FoldingStrategy(N, LHS, RHS, DAG, Subtarget); + std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); if (Res) { Matched = true; CombinesToApply.push_back(*Res); @@ -13503,7 +13428,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { - SDValue NewValue = Res.materialize(DAG, Subtarget); + SDValue NewValue = Res.materialize(DAG); if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); @@ -14775,20 +14700,13 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - - assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); - - if (N->getValueType(0).isFixedLengthVector()) - return SDValue(); - + assert(N->getOpcode() == RISCVISD::ADD_VL); SDValue Addend = N->getOperand(0); SDValue MulOp = N->getOperand(1); + SDValue AddMergeOp = N->getOperand(2); - if (N->getOpcode() == RISCVISD::ADD_VL) { - SDValue AddMergeOp = N->getOperand(2); - if (!AddMergeOp.isUndef()) - return SDValue(); - } + if (!AddMergeOp.isUndef()) + return SDValue(); auto IsVWMulOpc = [](unsigned Opc) { switch (Opc) { @@ -14812,16 +14730,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, if (!MulMergeOp.isUndef()) return SDValue(); - auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (N->getOpcode() == ISD::ADD) { - SDLoc DL(N); - return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, - Subtarget); - } - return std::make_pair(N->getOperand(3), N->getOperand(4)); - }(N, DAG, Subtarget); - + SDValue AddMask = N->getOperand(3); + SDValue AddVL = N->getOperand(4); SDValue MulMask = MulOp.getOperand(3); SDValue MulVL = MulOp.getOperand(4); @@ -15087,18 +14997,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, VT, NewFMV, DAG.getConstant(~SignBit, DL, VT)); } - case ISD::ADD: { - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; - if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) - return V; + case ISD::ADD: return performADDCombine(N, DAG, Subtarget); - } - case ISD::SUB: { - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; + case ISD::SUB: return performSUBCombine(N, DAG, Subtarget); - } case ISD::AND: return performANDCombine(N, DCI, Subtarget); case ISD::OR: @@ -15106,8 +15008,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return performXORCombine(N, DAG, Subtarget); case ISD::MUL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: @@ -15584,7 +15484,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::ADD_VL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) return V; return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::SUB_VL: @@ -15593,7 +15493,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: case RISCVISD::MUL_VL: - return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); + return combineBinOp_VLToVWBinOp_VL(N, DCI); case RISCVISD::VFMADD_VL: case RISCVISD::VFNMADD_VL: case RISCVISD::VFMSUB_VL: diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index faaf7f0..061bc96 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -289,8 +289,9 @@ static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType, return ConvReg; } -bool SPIRVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool SPIRVLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { auto Opc = MI.getOpcode(); MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); if (!isTypeFoldingSupported(Opc)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h index 2541ff2..f18b15b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h @@ -29,7 +29,8 @@ class SPIRVLegalizerInfo : public LegalizerInfo { SPIRVGlobalRegistry *GR; public: - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; SPIRVLegalizerInfo(const SPIRVSubtarget &ST); }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1e4b136..cd56529 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40221,6 +40221,34 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } return SDValue(); } + case X86ISD::SHUF128: { + // If we're permuting the upper 256-bits subvectors of a concatenation, then + // see if we can peek through and access the subvector directly. + if (VT.is512BitVector()) { + // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the + // upper subvector is used. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + uint64_t Mask = N->getConstantOperandVal(2); + SmallVector<SDValue> LHSOps, RHSOps; + SDValue NewLHS, NewRHS; + if ((Mask & 0x0A) == 0x0A && + collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) { + NewLHS = widenSubVector(LHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0x0A; + } + if ((Mask & 0xA0) == 0xA0 && + collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) { + NewRHS = widenSubVector(RHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0xA0; + } + if (NewLHS || NewRHS) + return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS, + NewRHS ? NewRHS : RHS, + DAG.getTargetConstant(Mask, DL, MVT::i8)); + } + return SDValue(); + } case X86ISD::VPERM2X128: { // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)). SDValue LHS = N->getOperand(0); @@ -54572,6 +54600,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT()) return Op0.getOperand(0); } + + // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x)) + if (Op0.getOpcode() == X86ISD::VPERMI && Subtarget.useAVX512Regs() && + !X86::mayFoldLoad(Op0.getOperand(0), Subtarget)) + return DAG.getNode(Op0.getOpcode(), DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, + Op0.getOperand(0), Op0.getOperand(0)), + Op0.getOperand(1)); } // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128. @@ -54979,6 +55015,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); } break; + case X86ISD::BLENDI: + if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) { + uint64_t Mask0 = Ops[0].getConstantOperandVal(2); + uint64_t Mask1 = Ops[1].getConstantOperandVal(2); + uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0; + MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements()); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Sel = + DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT)); + return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1), + ConcatSubOperand(VT, Ops, 0)); + } + break; case ISD::VSELECT: if (!IsSplat && Subtarget.hasAVX512() && (VT.is256BitVector() || diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 6b0c1b8..08f5a88 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -350,212 +350,212 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + def 8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def 16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; + def 16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; + def 64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; + def 8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; + def 16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; + def 64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; + def 8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } } - def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; - def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; + def 16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; + def 64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; + def 8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + def 8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def 16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; + def 16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; + def 64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; + def 8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; - def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; + def 8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; + def 16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; + def 64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; + def 8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; + def 16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; + def 32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; + def 64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; } let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { let Predicates = [NoNDD] in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + def 16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + def 8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def 16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def 32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def 64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; - def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; - def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; - def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; - def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; + def 16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; + def 32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; + def 64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; + def 16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; + def 16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; - def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; - def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; - def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; + def 16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; + def 64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; + def 8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; + def 16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; + def 64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; + def 16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; + def 16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; + def 64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; } } - def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; - def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; - def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; - def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; - def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; + def 8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; + def 16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; + def 32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; + def 64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; + def 16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; + def 64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; + def 8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; + def 8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; - def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; - def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; + def 8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; } let Predicates = [In64BitMode] in { - def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; - def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; + def 16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; + def 8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; + def 16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; + def 64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -571,162 +571,162 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; + def 8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; - def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; + def 16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; + def 32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; + def 64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; } } } // isCommutable let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } - def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; - def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; - def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; + def 8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; + def 16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; + def 32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; + def 64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; } let Predicates = [In64BitMode] in { - def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; - def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; - def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; - def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; + def 8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; + def 16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; + def 32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; + def 64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; } let Predicates = [NoNDD] in { - def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; + def 8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; + def 16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; + def 32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; + def 64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; } } let Predicates = [In64BitMode] in { - def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; - def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; + def 8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; + def 16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; + def 64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; } - def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } let Predicates = [In64BitMode] in { - def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; - def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; + def 16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; - def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; + def 16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; } let Predicates = [In64BitMode] in { - def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; - def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; + def 8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; + def 16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; + def 16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; + def 64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -739,71 +739,71 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; - def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; - def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; } - def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; + def 64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; let mayLoad = 1 in - def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; + def 8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; } - def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 305bd74..772ed2a 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1212,36 +1212,33 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { (implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>; } -multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, - RegisterClass RC, X86MemOperand x86memop, - X86FoldableSchedWrite sched, string Suffix = ""> { -let hasSideEffects = 0 in { - def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched]>; - let mayLoad = 1 in - def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched.Folded]>; -} +multiclass Bls<string m, Format RegMRM, Format MemMRM, X86TypeInfo t, string Suffix = ""> { + let SchedRW = [WriteBLS] in { + def rr#Suffix : UnaryOpR<0xF3, RegMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; + } + + let SchedRW = [WriteBLS.Folded] in + def rm#Suffix : UnaryOpM<0xF3, MemMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; } -let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W; +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32>, VEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64>, VEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32>, VEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64>, VEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32>, VEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64>, VEX; } -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; +let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32, "_EVEX">, EVEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64, "_EVEX">, EVEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32, "_EVEX">, EVEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64, "_EVEX">, EVEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32, "_EVEX">, EVEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX; } let Predicates = [HasBMI] in { @@ -1281,50 +1278,35 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } -multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, SDPatternOperator OpNode, - PatFrag ld_frag, X86FoldableSchedWrite Sched, - string Suffix = ""> { - def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>, - T8, VEX, Sched<[Sched]>; -let mayLoad = 1 in - def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)), - (implicit EFLAGS)]>, T8, VEX, - Sched<[Sched.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - Sched.ReadAfterFold]>; +multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + X86FoldableSchedWrite sched, string Suffix = ""> { + let SchedRW = [sched], Form = MRMSrcReg4VOp3 in + def rr#Suffix : BinOpRR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, T8; + let SchedRW = [sched.Folded, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + sched.ReadAfterFold], Form = MRMSrcMem4VOp3 in + def rm#Suffix : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), + t.RegClass:$src2))]>, T8; } let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR>; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR>, REX_W; + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR>, VEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR>, VEX; } let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI>; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI>, REX_W; -} -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W; -} -let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W; + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI>, VEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI>, VEX; +} +let Predicates = [HasBMI, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR, "_EVEX">, EVEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR, "_EVEX">, EVEX; +} +let Predicates = [HasBMI2, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI, "_EVEX">, EVEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI, "_EVEX">, EVEX; } def CountTrailingOnes : SDNodeXForm<imm, [{ diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 2e08c7b..32941c0 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1524,7 +1524,8 @@ StringRef sys::getHostCPUName() { // Use processor id to detect cpu name. uint32_t processor_id; __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); - switch (processor_id & 0xff00) { + // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. + switch (processor_id & 0xf000) { case 0xc000: // Loongson 64bit, 4-issue return "la464"; // TODO: Others. diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 529f730..89a1ad2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2953,6 +2953,9 @@ void coro::salvageDebugInfo( std::optional<BasicBlock::iterator> InsertPt; if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. if (!OptimizeFrame && I->getDebugLoc()) DVI.setDebugLoc(I->getDebugLoc()); } else if (isa<Argument>(Storage)) @@ -2988,9 +2991,14 @@ void coro::salvageDebugInfo( // dbg.declare does. if (DPV.getType() == DPValue::LocationType::Declare) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. + if (!OptimizeFrame && I->getDebugLoc()) + DPV.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) { DPV.removeFromParent(); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 49ac1e9..cc93c86 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -366,6 +366,13 @@ struct Decomposition { append_range(Vars, Other.Vars); } + void sub(const Decomposition &Other) { + Decomposition Tmp = Other; + Tmp.mul(-1); + add(Tmp.Offset); + append_range(Vars, Tmp.Vars); + } + void mul(int64_t Factor) { Offset = multiplyWithOverflow(Offset, Factor); for (auto &Var : Vars) @@ -569,10 +576,12 @@ static Decomposition decompose(Value *V, return Result; } - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) - return {-1 * CI->getSExtValue(), {{1, Op0}}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {0, {{1, Op0}, {-1, Op1}}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) { + auto ResA = decompose(Op0, Preconditions, IsSigned, DL); + auto ResB = decompose(Op1, Preconditions, IsSigned, DL); + ResA.sub(ResB); + return ResA; + } return {V, IsKnownNonNegative}; } @@ -635,7 +644,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, // First try to look up \p V in Value2Index and NewVariables. Otherwise add a // new entry to NewVariables. - DenseMap<Value *, unsigned> NewIndexMap; + SmallDenseMap<Value *, unsigned> NewIndexMap; auto GetOrAddIndex = [&Value2Index, &NewVariables, &NewIndexMap](Value *V) -> unsigned { auto V2I = Value2Index.find(V); @@ -659,7 +668,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, IsSigned, IsEq, IsNe); // Collect variables that are known to be positive in all uses in the // constraint. - DenseMap<Value *, bool> KnownNonNegativeVariables; + SmallDenseMap<Value *, bool> KnownNonNegativeVariables; auto &R = Res.Coefficients; for (const auto &KV : VariablesA) { R[GetOrAddIndex(KV.Variable)] += KV.Coefficient; diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 0990c75..ea31356 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -33,37 +33,37 @@ STATISTIC(NumVFDeclAdded, STATISTIC(NumCompUsedAdded, "Number of `@llvm.compiler.used` operands that have been added."); -/// A helper function that adds the vector function declaration that -/// vectorizes the CallInst CI with a vectorization factor of VF -/// lanes. The TLI assumes that all parameters and the return type of -/// CI (other than void) need to be widened to a VectorType of VF -/// lanes. +/// A helper function that adds the vector variant declaration for vectorizing +/// the CallInst \p CI with a vectorization factor of \p VF lanes. For each +/// mapping, TLI provides a VABI prefix, which contains all information required +/// to create vector function declaration. static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, - bool Predicate, const StringRef VFName) { + const VecDesc *VD) { Module *M = CI.getModule(); + FunctionType *ScalarFTy = CI.getFunctionType(); - // Add function declaration. - Type *RetTy = ToVectorTy(CI.getType(), VF); - SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); - assert(!CI.getFunctionType()->isVarArg() && - "VarArg functions are not supported."); - if (Predicate) - Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF)); - FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false); - Function *VectorF = - Function::Create(FTy, Function::ExternalLinkage, VFName, M); - VectorF->copyAttributesFrom(CI.getCalledFunction()); + assert(!ScalarFTy->isVarArg() && "VarArg functions are not supported."); + + const std::optional<VFInfo> Info = VFABI::tryDemangleForVFABI( + VD->getVectorFunctionABIVariantString(), ScalarFTy); + + assert(Info && "Failed to demangle vector variant"); + assert(Info->Shape.VF == VF && "Mangled name does not match VF"); + + const StringRef VFName = VD->getVectorFnName(); + FunctionType *VectorFTy = VFABI::createFunctionType(*Info, ScalarFTy); + Function *VecFunc = + Function::Create(VectorFTy, Function::ExternalLinkage, VFName, M); + VecFunc->copyAttributesFrom(CI.getCalledFunction()); ++NumVFDeclAdded; LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName - << "` of type " << *(VectorF->getType()) << "\n"); + << "` of type " << *VectorFTy << "\n"); // Make function declaration (without a body) "sticky" in the IR by // listing it in the @llvm.compiler.used intrinsic. - assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` " + assert(!VecFunc->size() && "VFABI attribute requires `@llvm.compiler.used` " "only on declarations."); - appendToCompilerUsed(*M, {VectorF}); + appendToCompilerUsed(*M, {VecFunc}); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName << "` to `@llvm.compiler.used`.\n"); ++NumCompUsedAdded; @@ -100,7 +100,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { } Function *VariantF = M->getFunction(VD->getVectorFnName()); if (!VariantF) - addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName()); + addVariantDeclaration(CI, VF, VD); } }; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 55e3756..61d891d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { } static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, + bool RemoveOrigDefaultBlock = true) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); auto *BB = Switch->getParent(); auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); + if (RemoveOrigDefaultBlock) + OrigDefaultBlock->removePredecessor(BB); BasicBlock *NewDefaultBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), OrigDefaultBlock); @@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, if (DTU) { SmallVector<DominatorTree::UpdateType, 2> Updates; Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) + if (RemoveOrigDefaultBlock && + !is_contained(successors(BB), OrigDefaultBlock)) Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); DTU->applyUpdates(Updates); } @@ -5609,10 +5612,28 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, Known.getBitWidth() - (Known.Zero | Known.One).popcount(); assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && - SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI, DTU); - return true; + NumUnknownBits < 64 /* avoid overflow */) { + uint64_t AllNumCases = 1ULL << NumUnknownBits; + if (SI->getNumCases() == AllNumCases) { + createUnreachableSwitchDefault(SI, DTU); + return true; + } + // When only one case value is missing, replace default with that case. + // Eliminating the default branch will provide more opportunities for + // optimization, such as lookup tables. + if (SI->getNumCases() == AllNumCases - 1) { + assert(NumUnknownBits > 1 && "Should be canonicalized to a branch"); + uint64_t MissingCaseVal = 0; + for (const auto &Case : SI->cases()) + MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue(); + auto *MissingCase = + cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal)); + SwitchInstProfUpdateWrapper SIW(*SI); + SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0)); + createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false); + SIW.setSuccessorWeight(0, 0); + return true; + } } if (DeadCases.empty()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 577ce80..150ab30 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -167,9 +167,14 @@ public: } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL, const Twine &Name = "") { - return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, - Name); + DebugLoc DL, const Twine &Name = "", + std::optional<FastMathFlags> FMFs = std::nullopt) { + auto *Select = + FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + *FMFs, DL, Name) + : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + DL, Name); + return tryInsertInstruction(Select); } /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8e135d8..f5f0461 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9141,7 +9141,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); + auto *NewExitingVPV = PhiR->getBackedgeValue(); // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9151,21 +9151,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( VPValue *Red = PhiR->getBackedgeValue(); assert(Red->getDefiningRecipe()->getParent() != LatchVPBB && "reduction recipe must be defined before latch"); - FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - Result = + std::optional<FastMathFlags> FMFs = PhiTy->isFloatingPointTy() - ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) - : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Result->insertBefore(&*Builder.getInsertPoint()); - Red->replaceUsesWithIf( - Result->getVPSingleValue(), - [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); + ? std::make_optional(RdxDesc.getFastMathFlags()) + : std::nullopt; + NewExitingVPV = Builder.createSelect(Cond, Red, PhiR, {}, "", FMFs); + Red->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) { + return isa<VPLiveOut>(&U); + }); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Result->getVPSingleValue()); + PhiR->setOperand(1, NewExitingVPV); } // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the @@ -9174,17 +9173,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, - Result->getVPSingleValue(), RdxTy); + auto *Trunc = + new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); auto *Extnd = RdxDesc.isSigned() ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); - Trunc->insertAfter(Result); + Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); Extnd->insertAfter(Trunc); - Result->getVPSingleValue()->replaceAllUsesWith(Extnd); - Trunc->setOperand(0, Result->getVPSingleValue()); + NewExitingVPV->replaceAllUsesWith(Extnd); + Trunc->setOperand(0, NewExitingVPV); } } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 30499152..bd89ec0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11139,6 +11139,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { case Instruction::ExtractElement: { Value *V = E->getSingleOperand(0); + if (const TreeEntry *TE = getTreeEntry(V)) + V = TE->VectorizedValue; setInsertPointAfterBundle(E); V = FinalShuffle(V, E, VecTy, IsSigned); E->VectorizedValue = V; diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 77254b7..db50b6d 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -391,8 +391,17 @@ function(runtime_register_target name) add_dependencies(runtimes-test-depends runtimes-test-depends-${name}) endif() foreach(runtime_name ${runtime_names}) + if(NOT TARGET ${runtime_name}) + add_custom_target(${runtime_name}) + endif() add_dependencies(${runtime_name} ${runtime_name}-${name}) + if(NOT TARGET install-${runtime_name}) + add_custom_target(install-${runtime_name}) + endif() add_dependencies(install-${runtime_name} install-${runtime_name}-${name}) + if(NOT TARGET install-${runtime_name}-stripped) + add_custom_target(install-${runtime_name}-stripped) + endif() add_dependencies(install-${runtime_name}-stripped install-${runtime_name}-${name}-stripped) endforeach() foreach(component ${LLVM_RUNTIME_DISTRIBUTION_COMPONENTS}) @@ -459,11 +468,6 @@ if(runtimes) add_custom_target(runtimes-test-depends) set(test_targets "") endif() - foreach(runtime_name ${RUNTIME_NAMES}) - add_custom_target(${runtime_name}) - add_custom_target(install-${runtime_name}) - add_custom_target(install-${runtime_name}-stripped) - endforeach() if(LLVM_RUNTIME_DISTRIBUTION_COMPONENTS) foreach(component ${LLVM_RUNTIME_DISTRIBUTION_COMPONENTS}) add_custom_target(${component}) diff --git a/llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll b/llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll new file mode 100644 index 0000000..afe3602 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/inttoptr_constexpr.ll @@ -0,0 +1,49 @@ +; RUN: opt -passes=aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s + +; CHECK: NoAlias: i8* %a, i8* %gep +define void @inttoptr_alloca() { + %a = alloca i8 + %a.int = ptrtoint ptr %a to i64 + %a.int.1 = add i64 %a.int, 1 + %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %a.int.1 + %load = load i8, ptr %gep + store i8 1, ptr %a + ret void +} + +; CHECK: NoAlias: i8* %a, i8* %gep +define void @inttoptr_alloca_unknown_relation(i64 %offset) { + %a = alloca i8 + %a.int = ptrtoint ptr %a to i64 + %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %offset + %load = load i8, ptr %gep + store i8 1, ptr %a + ret void +} + +; CHECK: NoAlias: i8* %a, i8* %gep +define void @inttoptr_alloca_noescape(i64 %offset) { + %a = alloca i8 + %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %offset + %load = load i8, ptr %gep + store i8 1, ptr %a + ret void +} + +; CHECK: NoAlias: i8* %a, i8* %gep +define void @inttoptr_noalias(ptr noalias %a) { + %a.int = ptrtoint ptr %a to i64 + %a.int.1 = add i64 %a.int, 1 + %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %a.int.1 + %load = load i8, ptr %gep + store i8 1, ptr %a + ret void +} + +; CHECK: NoAlias: i8* %a, i8* %gep +define void @inttoptr_noalias_noescape(ptr noalias %a, i64 %offset) { + %gep = getelementptr i8, ptr inttoptr (i64 -1 to ptr), i64 %offset + %load = load i8, ptr %gep + store i8 1, ptr %a + ret void +} diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll index d96ff49..24ed59c 100644 --- a/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll +++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-simple.ll @@ -33,5 +33,5 @@ for.end: !7 = distinct !{!7, !9} ; LoopID !9 = !{!"llvm.loop.parallel_accesses", !6} - -; CHECK: Parallel Loop +; CHECK: Loop info for function 'func': +; CHECK: Parallel Loop at depth 1 containing: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index 8529dd3..4c07081 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -13,7 +13,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -47,13 +47,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew) + ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -93,7 +93,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -249,7 +249,7 @@ define i32 @fetch_and_nand(ptr %p) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0 ; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) @@ -302,7 +302,7 @@ define i32 @fetch_and_or(ptr %p) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p) + ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p) ; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 @@ -735,8 +735,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -761,7 +761,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -785,8 +785,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -810,8 +810,8 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -835,8 +835,8 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -860,8 +860,8 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -885,10 +885,10 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -912,10 +912,10 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -940,10 +940,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 7 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -968,10 +968,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 7 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -995,8 +995,8 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1021,7 +1021,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1045,8 +1045,8 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1070,8 +1070,8 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1095,8 +1095,8 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1120,8 +1120,8 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1145,10 +1145,10 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1172,10 +1172,10 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0 + ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1200,10 +1200,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 15 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1228,10 +1228,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 15 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1257,7 +1257,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) ; CHECK-NEXT: liveins: $w1, $x2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 @@ -1300,7 +1300,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) ; CHECK-NEXT: liveins: $w1, $x2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir index 14c53902..2d72e5f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpmode.mir @@ -79,11 +79,8 @@ body: | ; CHECK-LABEL: name: func_reset ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s64) - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: $x0 = COPY [[INTTOPTR]](p0) - ; CHECK-NEXT: BL &fesetmode, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: TCRETURNdi &fesetmode, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0 G_RESET_FPMODE RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir new file mode 100644 index 0000000..9d9e96d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fmul.mir @@ -0,0 +1,31 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name: mul_2H +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: mul_2H + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FMUL]](<4 x s32>) + ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s32>) = G_FMUL [[UV]], [[UV1]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMUL1]](<2 x s32>) + ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FMUL2]](s32) + ; CHECK-NEXT: $s0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 + %1:_(<4 x s32>) = COPY $q0 + %2:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>) + %5:_(s32) = nnan ninf nsz arcp contract afn reassoc G_VECREDUCE_FMUL %0(<8 x s32>) + $s0 = COPY %5(s32) + RET_ReallyLR implicit $s0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 178db85..866fe6f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -736,8 +736,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll index f853c08..ac08825 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function icmp_i8_shift_and_cmp --version 4 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define i32 @icmp_eq_imm(i32 %a) nounwind ssp { @@ -257,3 +258,18 @@ entry: %conv2 = zext i1 %cmp to i32 ret i32 %conv2 } + +define i32 @icmp_i8_shift_and_cmp(i8 %a, i8 %b) { +entry: +; CHECK-LABEL: icmp_i8_shift_and_cmp: +; CHECK: ubfiz [[REG1:w[0-9]+]], w0, #3, #5 +; CHECK-NEXT: sxtb [[REG0:w[0-9]+]], w1 +; CHECK-NEXT: cmp [[REG0]], [[REG1]], sxtb +; CHECK-NEXT: cset [[REG:w[0-9]+]], eq +; CHECK-NEXT: and w0, [[REG]], #0x1 + %op = shl i8 %a, 3 + %cmp = icmp eq i8 %b, %op + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll index e287eff..dda610e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -1392,6 +1392,24 @@ define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) { ret <8 x i8> %result } +define <4 x i16> @ext_via_i19(<4 x i16> %a) { +; CHECK-LABEL: ext_via_i19: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: uaddw.4s v0, v1, v0 +; CHECK-NEXT: uhadd.4s v0, v0, v1 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: ret + %t3 = zext <4 x i16> %a to <4 x i32> + %t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1> + %t5 = trunc <4 x i32> %t4 to <4 x i19> + %new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1> + %new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1> + %last = zext <4 x i19> %new1 to <4 x i32> + %t6 = trunc <4 x i32> %last to <4 x i16> + ret <4 x i16> %t6 +} + declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/AArch64/fexplog.ll b/llvm/test/CodeGen/AArch64/fexplog.ll index 26c0b68..2848a6b 100644 --- a/llvm/test/CodeGen/AArch64/fexplog.ll +++ b/llvm/test/CodeGen/AArch64/fexplog.ll @@ -3,36 +3,18 @@ ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define double @exp_f64(double %a) { -; CHECK-SD-LABEL: exp_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b exp -; -; CHECK-GI-LABEL: exp_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl exp -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: exp_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b exp entry: %c = call double @llvm.exp.f64(double %a) ret double %c } define float @exp_f32(float %a) { -; CHECK-SD-LABEL: exp_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b expf -; -; CHECK-GI-LABEL: exp_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: exp_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b expf entry: %c = call float @llvm.exp.f32(float %a) ret float %c @@ -1280,36 +1262,18 @@ entry: } define double @exp2_f64(double %a) { -; CHECK-SD-LABEL: exp2_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b exp2 -; -; CHECK-GI-LABEL: exp2_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl exp2 -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: exp2_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b exp2 entry: %c = call double @llvm.exp2.f64(double %a) ret double %c } define float @exp2_f32(float %a) { -; CHECK-SD-LABEL: exp2_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b exp2f -; -; CHECK-GI-LABEL: exp2_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: exp2_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b exp2f entry: %c = call float @llvm.exp2.f32(float %a) ret float %c @@ -2557,36 +2521,18 @@ entry: } define double @log_f64(double %a) { -; CHECK-SD-LABEL: log_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b log -; -; CHECK-GI-LABEL: log_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl log -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: log_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b log entry: %c = call double @llvm.log.f64(double %a) ret double %c } define float @log_f32(float %a) { -; CHECK-SD-LABEL: log_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b logf -; -; CHECK-GI-LABEL: log_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: log_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b logf entry: %c = call float @llvm.log.f32(float %a) ret float %c @@ -3834,36 +3780,18 @@ entry: } define double @log2_f64(double %a) { -; CHECK-SD-LABEL: log2_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b log2 -; -; CHECK-GI-LABEL: log2_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl log2 -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: log2_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b log2 entry: %c = call double @llvm.log2.f64(double %a) ret double %c } define float @log2_f32(float %a) { -; CHECK-SD-LABEL: log2_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b log2f -; -; CHECK-GI-LABEL: log2_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: log2_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b log2f entry: %c = call float @llvm.log2.f32(float %a) ret float %c @@ -5111,36 +5039,18 @@ entry: } define double @log10_f64(double %a) { -; CHECK-SD-LABEL: log10_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b log10 -; -; CHECK-GI-LABEL: log10_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl log10 -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: log10_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b log10 entry: %c = call double @llvm.log10.f64(double %a) ret double %c } define float @log10_f32(float %a) { -; CHECK-SD-LABEL: log10_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b log10f -; -; CHECK-GI-LABEL: log10_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: log10_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b log10f entry: %c = call float @llvm.log10.f32(float %a) ret float %c diff --git a/llvm/test/CodeGen/AArch64/fpmode.ll b/llvm/test/CodeGen/AArch64/fpmode.ll index a4f2c4f..ebfb069 100644 --- a/llvm/test/CodeGen/AArch64/fpmode.ll +++ b/llvm/test/CodeGen/AArch64/fpmode.ll @@ -63,11 +63,8 @@ define void @func_reset_fpmode_soft() #0 { ; ; GIS-LABEL: func_reset_fpmode_soft: ; GIS: // %bb.0: // %entry -; GIS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GIS-NEXT: mov x0, #-1 // =0xffffffffffffffff -; GIS-NEXT: bl fesetmode -; GIS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GIS-NEXT: ret +; GIS-NEXT: b fesetmode entry: call void @llvm.reset.fpmode() ret void diff --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll index 7681ab5..a55c0db 100644 --- a/llvm/test/CodeGen/AArch64/fpow.ll +++ b/llvm/test/CodeGen/AArch64/fpow.ll @@ -3,36 +3,18 @@ ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define double @pow_f64(double %a, double %b) { -; CHECK-SD-LABEL: pow_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b pow -; -; CHECK-GI-LABEL: pow_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl pow -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: pow_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b pow entry: %c = call double @llvm.pow.f64(double %a, double %b) ret double %c } define float @pow_f32(float %a, float %b) { -; CHECK-SD-LABEL: pow_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b powf -; -; CHECK-GI-LABEL: pow_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: pow_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b powf entry: %c = call float @llvm.pow.f32(float %a, float %b) ret float %c diff --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll index ed0f6c4..eb26128 100644 --- a/llvm/test/CodeGen/AArch64/frem.ll +++ b/llvm/test/CodeGen/AArch64/frem.ll @@ -5,36 +5,18 @@ ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define double @frem_f64(double %a, double %b) { -; CHECK-SD-LABEL: frem_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b fmod -; -; CHECK-GI-LABEL: frem_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl fmod -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: frem_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b fmod entry: %c = frem double %a, %b ret double %c } define float @frem_f32(float %a, float %b) { -; CHECK-SD-LABEL: frem_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b fmodf -; -; CHECK-GI-LABEL: frem_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: frem_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b fmodf entry: %c = frem float %a, %b ret float %c diff --git a/llvm/test/CodeGen/AArch64/fsincos.ll b/llvm/test/CodeGen/AArch64/fsincos.ll index 9784b9e..aef0b2e 100644 --- a/llvm/test/CodeGen/AArch64/fsincos.ll +++ b/llvm/test/CodeGen/AArch64/fsincos.ll @@ -3,36 +3,18 @@ ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define double @sin_f64(double %a) { -; CHECK-SD-LABEL: sin_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b sin -; -; CHECK-GI-LABEL: sin_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl sin -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sin_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b sin entry: %c = call double @llvm.sin.f64(double %a) ret double %c } define float @sin_f32(float %a) { -; CHECK-SD-LABEL: sin_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b sinf -; -; CHECK-GI-LABEL: sin_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sin_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b sinf entry: %c = call float @llvm.sin.f32(float %a) ret float %c @@ -1280,36 +1262,18 @@ entry: } define double @cos_f64(double %a) { -; CHECK-SD-LABEL: cos_f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b cos -; -; CHECK-GI-LABEL: cos_f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl cos -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cos_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b cos entry: %c = call double @llvm.cos.f64(double %a) ret double %c } define float @cos_f32(float %a) { -; CHECK-SD-LABEL: cos_f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: b cosf -; -; CHECK-GI-LABEL: cos_f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GI-NEXT: ret +; CHECK-LABEL: cos_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b cosf entry: %c = call float @llvm.cos.f32(float %a) ret float %c @@ -2556,6 +2520,24 @@ entry: ret <16 x half> %c } +; This is testing that we do not produce incorrect tailcall lowerings +define i64 @donttailcall(double noundef %x, double noundef %y) { +; CHECK-LABEL: donttailcall: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sin +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call double @llvm.sin.f64(double noundef %x) + %0 = bitcast double %call to i64 + ret i64 %0 +} + + declare <16 x half> @llvm.cos.v16f16(<16 x half>) declare <16 x half> @llvm.sin.v16f16(<16 x half>) declare <2 x double> @llvm.cos.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll index c3f14b0..8e6b15c 100644 --- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll +++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll @@ -298,18 +298,9 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { } define float @exp10_f32(float %x) { -; SDAG-LABEL: exp10_f32: -; SDAG: // %bb.0: -; SDAG-NEXT: b exp10f -; -; GISEL-LABEL: exp10_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISEL-NEXT: .cfi_def_cfa_offset 16 -; GISEL-NEXT: .cfi_offset w30, -16 -; GISEL-NEXT: bl exp10f -; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISEL-NEXT: ret +; CHECK-LABEL: exp10_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: b exp10f %r = call float @llvm.exp10.f32(float %x) ret float %r } @@ -541,18 +532,9 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { } define double @exp10_f64(double %x) { -; SDAG-LABEL: exp10_f64: -; SDAG: // %bb.0: -; SDAG-NEXT: b exp10 -; -; GISEL-LABEL: exp10_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; GISEL-NEXT: .cfi_def_cfa_offset 16 -; GISEL-NEXT: .cfi_offset w30, -16 -; GISEL-NEXT: bl exp10 -; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; GISEL-NEXT: ret +; CHECK-LABEL: exp10_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: b exp10 %r = call double @llvm.exp10.f64(double %x) ret double %r } diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll index a202bfb..1f7584b 100644 --- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll +++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s +; RUN: llc -o - -mtriple=arm64e-apple-macosx %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +; x16 is not available, shrink-wrapping cannot happen because +; StoreSwiftAsyncContext needs it. define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 { ; CHECK-LABEL: test_async_with_jumptable_x16_clobbered: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: orr x29, x29, #0x1000000000000000 -; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: add x16, sp, #8 ; CHECK-NEXT: movk x16, #49946, lsl #48 @@ -18,83 +20,52 @@ define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr s ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov x20, x22 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x9, LJTI0_0@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: add x9, x9, LJTI0_0@PAGEOFF -; CHECK-NEXT: Ltmp0: -; CHECK-NEXT: adr x10, Ltmp0 -; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2] -; CHECK-NEXT: add x10, x10, x11 -; CHECK-NEXT: mov x19, x20 -; CHECK-NEXT: br x10 -; CHECK-NEXT: LBB0_1: ; %then.2 -; CHECK-NEXT: mov x19, #0 ; =0x0 -; CHECK-NEXT: b LBB0_3 -; CHECK-NEXT: LBB0_2: ; %then.3 -; CHECK-NEXT: mov x19, x22 -; CHECK-NEXT: LBB0_3: ; %exit +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: cbnz x8, LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %then.1 +; CHECK-NEXT: str xzr, [x22] +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: LBB0_2: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x2, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload ; CHECK-NEXT: and x29, x29, #0xefffffffffffffff -; CHECK-NEXT: br x2 -; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 -; CHECK-NEXT: .cfi_endproc -; CHECK-NEXT: .section __TEXT,__const -; CHECK-NEXT: .p2align 2, 0x0 -; CHECK-NEXT: LJTI0_0: -; CHECK-NEXT: .long LBB0_3-Ltmp0 -; CHECK-NEXT: .long LBB0_1-Ltmp0 -; CHECK-NEXT: .long LBB0_1-Ltmp0 -; CHECK-NEXT: .long LBB0_2-Ltmp0 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: br x1 entry: %x16 = tail call i64 asm "", "={x16}"() %l = load i64, ptr %src, align 8 - switch i64 %l, label %dead [ - i64 0, label %exit - i64 1, label %then.1 - i64 2, label %then.2 - i64 3, label %then.3 - ] + %c = icmp eq i64 %l, 0 + br i1 %c, label %then.1, label %exit then.1: + store i64 0, ptr %src br label %exit -then.2: - br label %exit - -then.3: - br label %exit - -dead: ; preds = %entryresume.5 - unreachable - exit: - %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ] + %p = phi ptr [ %src, %then.1 ], [ %as, %entry ] tail call void asm sideeffect "", "{x16}"(i64 %x16) - %r = call i64 @foo() + %r = call i64 @foo(ptr %p) %fn = inttoptr i64 %r to ptr - musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as) + musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as) ret void } +; x17 is not available, shrink-wrapping cannot happen because +; StoreSwiftAsyncContext needs it. define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 { ; CHECK-LABEL: test_async_with_jumptable_x17_clobbered: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: orr x29, x29, #0x1000000000000000 -; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: add x16, sp, #8 ; CHECK-NEXT: movk x16, #49946, lsl #48 @@ -105,86 +76,61 @@ define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr s ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov x20, x22 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x9, LJTI1_0@PAGE -; CHECK-NEXT: Lloh3: -; CHECK-NEXT: add x9, x9, LJTI1_0@PAGEOFF -; CHECK-NEXT: Ltmp1: -; CHECK-NEXT: adr x10, Ltmp1 -; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2] -; CHECK-NEXT: add x10, x10, x11 -; CHECK-NEXT: mov x19, x20 -; CHECK-NEXT: br x10 -; CHECK-NEXT: LBB1_1: ; %then.2 -; CHECK-NEXT: mov x19, #0 ; =0x0 -; CHECK-NEXT: b LBB1_3 -; CHECK-NEXT: LBB1_2: ; %then.3 -; CHECK-NEXT: mov x19, x22 -; CHECK-NEXT: LBB1_3: ; %exit +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: cbnz x8, LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %then.1 +; CHECK-NEXT: str xzr, [x22] +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: LBB1_2: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x2, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload ; CHECK-NEXT: and x29, x29, #0xefffffffffffffff -; CHECK-NEXT: br x2 -; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3 -; CHECK-NEXT: .cfi_endproc -; CHECK-NEXT: .section __TEXT,__const -; CHECK-NEXT: .p2align 2, 0x0 -; CHECK-NEXT: LJTI1_0: -; CHECK-NEXT: .long LBB1_3-Ltmp1 -; CHECK-NEXT: .long LBB1_1-Ltmp1 -; CHECK-NEXT: .long LBB1_1-Ltmp1 -; CHECK-NEXT: .long LBB1_2-Ltmp1 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: br x1 entry: %x17 = tail call i64 asm "", "={x17}"() %l = load i64, ptr %src, align 8 - switch i64 %l, label %dead [ - i64 0, label %exit - i64 1, label %then.1 - i64 2, label %then.2 - i64 3, label %then.3 - ] + %c = icmp eq i64 %l, 0 + br i1 %c, label %then.1, label %exit then.1: + store i64 0, ptr %src br label %exit -then.2: - br label %exit - -then.3: - br label %exit - -dead: ; preds = %entryresume.5 - unreachable - exit: - %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ] + %p = phi ptr [ %src, %then.1 ], [ %as, %entry ] tail call void asm sideeffect "", "{x17}"(i64 %x17) - %r = call i64 @foo() + %r = call i64 @foo(ptr %p) %fn = inttoptr i64 %r to ptr - musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as) + musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as) ret void } define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 { ; CHECK-LABEL: test_async_with_jumptable_x1_clobbered: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: cbnz x8, LBB2_2 +; CHECK-NEXT: ; %bb.1: ; %then.1 +; CHECK-NEXT: str xzr, [x22] +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: LBB2_2: ; %exit ; CHECK-NEXT: orr x29, x29, #0x1000000000000000 -; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: add x16, sp, #8 ; CHECK-NEXT: movk x16, #49946, lsl #48 @@ -195,85 +141,52 @@ define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr sw ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: .cfi_offset w19, -32 -; CHECK-NEXT: mov x20, x22 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: Lloh4: -; CHECK-NEXT: adrp x9, LJTI2_0@PAGE -; CHECK-NEXT: Lloh5: -; CHECK-NEXT: add x9, x9, LJTI2_0@PAGEOFF -; CHECK-NEXT: Ltmp2: -; CHECK-NEXT: adr x10, Ltmp2 -; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2] -; CHECK-NEXT: add x10, x10, x11 -; CHECK-NEXT: mov x19, x20 -; CHECK-NEXT: br x10 -; CHECK-NEXT: LBB2_1: ; %then.2 -; CHECK-NEXT: mov x19, #0 ; =0x0 -; CHECK-NEXT: b LBB2_3 -; CHECK-NEXT: LBB2_2: ; %then.3 -; CHECK-NEXT: mov x19, x22 -; CHECK-NEXT: LBB2_3: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x2, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload ; CHECK-NEXT: and x29, x29, #0xefffffffffffffff -; CHECK-NEXT: br x2 -; CHECK-NEXT: .loh AdrpAdd Lloh4, Lloh5 -; CHECK-NEXT: .cfi_endproc -; CHECK-NEXT: .section __TEXT,__const -; CHECK-NEXT: .p2align 2, 0x0 -; CHECK-NEXT: LJTI2_0: -; CHECK-NEXT: .long LBB2_3-Ltmp2 -; CHECK-NEXT: .long LBB2_1-Ltmp2 -; CHECK-NEXT: .long LBB2_1-Ltmp2 -; CHECK-NEXT: .long LBB2_2-Ltmp2 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: br x1 entry: %x1 = tail call i64 asm "", "={x1}"() %l = load i64, ptr %src, align 8 - switch i64 %l, label %dead [ - i64 0, label %exit - i64 1, label %then.1 - i64 2, label %then.2 - i64 3, label %then.3 - ] + %c = icmp eq i64 %l, 0 + br i1 %c, label %then.1, label %exit then.1: + store i64 0, ptr %src br label %exit -then.2: - br label %exit - -then.3: - br label %exit - -dead: ; preds = %entryresume.5 - unreachable - exit: - %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ] + %p = phi ptr [ %src, %then.1 ], [ %as, %entry ] tail call void asm sideeffect "", "{x1}"(i64 %x1) - %r = call i64 @foo() + %r = call i64 @foo(ptr %p) %fn = inttoptr i64 %r to ptr - musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as) + musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as) ret void } define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 { ; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: cbnz x8, LBB3_2 +; CHECK-NEXT: ; %bb.1: ; %then.1 +; CHECK-NEXT: str xzr, [x22] +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: LBB3_2: ; %exit ; CHECK-NEXT: orr x29, x29, #0x1000000000000000 -; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: add x16, sp, #8 ; CHECK-NEXT: movk x16, #49946, lsl #48 @@ -284,76 +197,35 @@ define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: .cfi_offset w19, -32 -; CHECK-NEXT: mov x20, x22 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: Lloh6: -; CHECK-NEXT: adrp x10, LJTI3_0@PAGE -; CHECK-NEXT: Lloh7: -; CHECK-NEXT: add x10, x10, LJTI3_0@PAGEOFF -; CHECK-NEXT: Ltmp3: -; CHECK-NEXT: adr x11, Ltmp3 -; CHECK-NEXT: ldrsw x12, [x10, x8, lsl #2] -; CHECK-NEXT: add x11, x11, x12 -; CHECK-NEXT: mov x19, x20 -; CHECK-NEXT: br x11 -; CHECK-NEXT: LBB3_1: ; %then.2 -; CHECK-NEXT: mov x19, #0 ; =0x0 -; CHECK-NEXT: b LBB3_3 -; CHECK-NEXT: LBB3_2: ; %then.3 -; CHECK-NEXT: mov x19, x22 -; CHECK-NEXT: LBB3_3: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x2, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload ; CHECK-NEXT: and x29, x29, #0xefffffffffffffff -; CHECK-NEXT: br x2 -; CHECK-NEXT: .loh AdrpAdd Lloh6, Lloh7 -; CHECK-NEXT: .cfi_endproc -; CHECK-NEXT: .section __TEXT,__const -; CHECK-NEXT: .p2align 2, 0x0 -; CHECK-NEXT: LJTI3_0: -; CHECK-NEXT: .long LBB3_3-Ltmp3 -; CHECK-NEXT: .long LBB3_1-Ltmp3 -; CHECK-NEXT: .long LBB3_1-Ltmp3 -; CHECK-NEXT: .long LBB3_2-Ltmp3 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: br x1 entry: %x1 = tail call i64 asm "", "={x1}"() %x9 = tail call i64 asm "", "={x9}"() %l = load i64, ptr %src, align 8 - switch i64 %l, label %dead [ - i64 0, label %exit - i64 1, label %then.1 - i64 2, label %then.2 - i64 3, label %then.3 - ] + %c = icmp eq i64 %l, 0 + br i1 %c, label %then.1, label %exit then.1: + store i64 0, ptr %src br label %exit -then.2: - br label %exit - -then.3: - br label %exit - -dead: ; preds = %entryresume.5 - unreachable - exit: - %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ] + %p = phi ptr [ %src, %then.1 ], [ %as, %entry ] tail call void asm sideeffect "", "{x1}"(i64 %x1) tail call void asm sideeffect "", "{x9}"(i64 %x9) - %r = call i64 @foo() + %r = call i64 @foo(ptr %p) %fn = inttoptr i64 %r to ptr - musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as) + musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as) ret void } @@ -361,23 +233,8 @@ exit: define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 { ; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: orr x29, x29, #0x1000000000000000 -; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-NEXT: add x16, sp, #8 -; CHECK-NEXT: movk x16, #49946, lsl #48 -; CHECK-NEXT: mov x17, x22 -; CHECK-NEXT: pacdb x17, x16 -; CHECK-NEXT: str x17, [sp, #8] -; CHECK-NEXT: add x29, sp, #16 -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: .cfi_offset w19, -32 -; CHECK-NEXT: ; InlineAsm Start -; CHECK-NEXT: ; InlineAsm End -; CHECK-NEXT: ; InlineAsm Start -; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start @@ -404,27 +261,27 @@ define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %sr ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End -; CHECK-NEXT: ldr x10, [x0] -; CHECK-NEXT: mov x20, x22 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: Lloh8: -; CHECK-NEXT: adrp x17, LJTI4_0@PAGE -; CHECK-NEXT: Lloh9: -; CHECK-NEXT: add x17, x17, LJTI4_0@PAGEOFF -; CHECK-NEXT: Ltmp4: -; CHECK-NEXT: adr x0, Ltmp4 -; CHECK-NEXT: ldrsw x19, [x17, x10, lsl #2] -; CHECK-NEXT: add x0, x0, x19 -; CHECK-NEXT: mov x19, x20 -; CHECK-NEXT: br x0 -; CHECK-NEXT: LBB4_1: ; %then.2 -; CHECK-NEXT: mov x19, #0 ; =0x0 -; CHECK-NEXT: b LBB4_3 -; CHECK-NEXT: LBB4_2: ; %then.3 -; CHECK-NEXT: mov x19, x22 -; CHECK-NEXT: LBB4_3: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: ldr x10, [x22] +; CHECK-NEXT: cbnz x10, LBB4_2 +; CHECK-NEXT: ; %bb.1: ; %then.1 +; CHECK-NEXT: str xzr, [x22] +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: LBB4_2: ; %exit +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x16, sp, #8 +; CHECK-NEXT: movk x16, #49946, lsl #48 +; CHECK-NEXT: mov x17, x22 +; CHECK-NEXT: pacdb x17, x16 +; CHECK-NEXT: str x17, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start @@ -454,22 +311,12 @@ define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %sr ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x2, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload ; CHECK-NEXT: and x29, x29, #0xefffffffffffffff -; CHECK-NEXT: br x2 -; CHECK-NEXT: .loh AdrpAdd Lloh8, Lloh9 -; CHECK-NEXT: .cfi_endproc -; CHECK-NEXT: .section __TEXT,__const -; CHECK-NEXT: .p2align 2, 0x0 -; CHECK-NEXT: LJTI4_0: -; CHECK-NEXT: .long LBB4_3-Ltmp4 -; CHECK-NEXT: .long LBB4_1-Ltmp4 -; CHECK-NEXT: .long LBB4_1-Ltmp4 -; CHECK-NEXT: .long LBB4_2-Ltmp4 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: br x1 entry: %x1 = tail call i64 asm "", "={x1}"() %x2 = tail call i64 asm "", "={x2}"() @@ -485,29 +332,16 @@ entry: %x13 = tail call i64 asm "", "={x13}"() %x14 = tail call i64 asm "", "={x14}"() %x15 = tail call i64 asm "", "={x15}"() - %x16 = tail call i64 asm "", "={x16}"() %l = load i64, ptr %src, align 8 - switch i64 %l, label %dead [ - i64 0, label %exit - i64 1, label %then.1 - i64 2, label %then.2 - i64 3, label %then.3 - ] + %c = icmp eq i64 %l, 0 + br i1 %c, label %then.1, label %exit then.1: + store i64 0, ptr %src br label %exit -then.2: - br label %exit - -then.3: - br label %exit - -dead: ; preds = %entryresume.5 - unreachable - exit: - %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ] + %p = phi ptr [ %src, %then.1 ], [ %as, %entry ] tail call void asm sideeffect "", "{x1}"(i64 %x1) tail call void asm sideeffect "", "{x2}"(i64 %x2) tail call void asm sideeffect "", "{x3}"(i64 %x3) @@ -522,37 +356,32 @@ exit: tail call void asm sideeffect "", "{x13}"(i64 %x13) tail call void asm sideeffect "", "{x14}"(i64 %x14) tail call void asm sideeffect "", "{x15}"(i64 %x15) - tail call void asm sideeffect "", "{x16}"(i64 %x16) - %r = call i64 @foo() + %r = call i64 @foo(ptr %p) %fn = inttoptr i64 %r to ptr - musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as) + musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as) ret void } + ; There is only 1 available scratch registers left, shrink-wrapping cannot ; happen because StoreSwiftAsyncContext needs 2 free scratch registers. define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src, ptr swiftasync %as) #0 { ; CHECK-LABEL: test_async_with_jumptable_1_available_reg_left: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: orr x29, x29, #0x1000000000000000 -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: stp x21, x19, [sp, #8] ; 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; CHECK-NEXT: add x16, sp, #24 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x16, sp, #8 ; CHECK-NEXT: movk x16, #49946, lsl #48 ; CHECK-NEXT: mov x17, x22 ; CHECK-NEXT: pacdb x17, x16 -; CHECK-NEXT: str x17, [sp, #24] -; CHECK-NEXT: add x29, sp, #32 +; CHECK-NEXT: str x17, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: .cfi_offset w19, -32 -; CHECK-NEXT: .cfi_offset w21, -40 -; CHECK-NEXT: ; InlineAsm Start -; CHECK-NEXT: ; InlineAsm End -; CHECK-NEXT: ; InlineAsm Start -; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start @@ -581,27 +410,15 @@ define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End -; CHECK-NEXT: ldr x10, [x0] -; CHECK-NEXT: mov x20, x22 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: Lloh10: -; CHECK-NEXT: adrp x0, LJTI5_0@PAGE -; CHECK-NEXT: Lloh11: -; CHECK-NEXT: add x0, x0, LJTI5_0@PAGEOFF -; CHECK-NEXT: Ltmp5: -; CHECK-NEXT: adr x21, Ltmp5 -; CHECK-NEXT: ldrsw x19, [x0, x10, lsl #2] -; CHECK-NEXT: add x21, x21, x19 -; CHECK-NEXT: mov x19, x20 -; CHECK-NEXT: br x21 -; CHECK-NEXT: LBB5_1: ; %then.2 -; CHECK-NEXT: mov x19, #0 ; =0x0 -; CHECK-NEXT: b LBB5_3 -; CHECK-NEXT: LBB5_2: ; %then.3 -; CHECK-NEXT: mov x19, x22 -; CHECK-NEXT: LBB5_3: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: ldr x10, [x22] +; CHECK-NEXT: cbnz x10, LBB5_2 +; CHECK-NEXT: ; %bb.1: ; %then.1 +; CHECK-NEXT: str xzr, [x22] +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: LBB5_2: ; %exit ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start @@ -633,23 +450,12 @@ define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: bl _foo -; CHECK-NEXT: mov x2, x0 -; CHECK-NEXT: mov x0, x19 -; CHECK-NEXT: mov x1, x20 -; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload -; CHECK-NEXT: ldp x21, x19, [sp, #8] ; 16-byte Folded Reload +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: and x29, x29, #0xefffffffffffffff -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: br x2 -; CHECK-NEXT: .loh AdrpAdd Lloh10, Lloh11 -; CHECK-NEXT: .cfi_endproc -; CHECK-NEXT: .section __TEXT,__const -; CHECK-NEXT: .p2align 2, 0x0 -; CHECK-NEXT: LJTI5_0: -; CHECK-NEXT: .long LBB5_3-Ltmp5 -; CHECK-NEXT: .long LBB5_1-Ltmp5 -; CHECK-NEXT: .long LBB5_1-Ltmp5 -; CHECK-NEXT: .long LBB5_2-Ltmp5 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: br x1 entry: %x1 = tail call i64 asm "", "={x1}"() %x2 = tail call i64 asm "", "={x2}"() @@ -666,29 +472,16 @@ entry: %x14 = tail call i64 asm "", "={x14}"() %x15 = tail call i64 asm "", "={x15}"() %x16 = tail call i64 asm "", "={x16}"() - %x17 = tail call i64 asm "", "={x17}"() %l = load i64, ptr %src, align 8 - switch i64 %l, label %dead [ - i64 0, label %exit - i64 1, label %then.1 - i64 2, label %then.2 - i64 3, label %then.3 - ] + %c = icmp eq i64 %l, 0 + br i1 %c, label %then.1, label %exit then.1: + store i64 0, ptr %src br label %exit -then.2: - br label %exit - -then.3: - br label %exit - -dead: ; preds = %entryresume.5 - unreachable - exit: - %p = phi ptr [ %src, %then.3 ], [ null, %then.2 ], [ %as, %entry ], [ null, %then.1 ] + %p = phi ptr [ %src, %then.1 ], [ %as, %entry ] tail call void asm sideeffect "", "{x1}"(i64 %x1) tail call void asm sideeffect "", "{x2}"(i64 %x2) tail call void asm sideeffect "", "{x3}"(i64 %x3) @@ -704,13 +497,12 @@ exit: tail call void asm sideeffect "", "{x14}"(i64 %x14) tail call void asm sideeffect "", "{x15}"(i64 %x15) tail call void asm sideeffect "", "{x16}"(i64 %x16) - tail call void asm sideeffect "", "{x17}"(i64 %x17) - %r = call i64 @foo() + %r = call i64 @foo(ptr %p) %fn = inttoptr i64 %r to ptr - musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %p, ptr %as) + musttail call swifttailcc void %fn(ptr swiftasync %src, ptr %as) ret void } -declare i64 @foo() +declare i64 @foo(ptr) attributes #0 = { "frame-pointer"="non-leaf" } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll index ff2c5c4..67b4ebb 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll @@ -1,13 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-NOFP16 -; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-FP16 +; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define float @mul_HalfS(<2 x float> %bin.rdx) { -; CHECK-LABEL: mul_HalfS: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mul_HalfS: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mul_HalfS: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx) ret float %r } @@ -40,6 +49,27 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2] ; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3] ; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: mul_HalfH: +; CHECK-GI-NOFP16: // %bb.0: +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] +; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: mul_HalfH: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fmul h1, h2, h3 +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %r = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx) ret half %r } @@ -93,17 +123,49 @@ define half @mul_H(<8 x half> %bin.rdx) { ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2] ; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3] ; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: mul_H: +; CHECK-GI-NOFP16: // %bb.0: +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v1.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] +; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: mul_H: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: mov d1, v0.d[1] +; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fmul h1, h2, h3 +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %r = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %bin.rdx) ret half %r } define float @mul_S(<4 x float> %bin.rdx) { -; CHECK-LABEL: mul_S: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mul_S: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mul_S: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx) ret float %r } @@ -205,18 +267,56 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2] ; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3] ; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: mul_2H: +; CHECK-GI-NOFP16: // %bb.0: +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] +; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: mul_2H: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h +; CHECK-GI-FP16-NEXT: mov d1, v0.d[1] +; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fmul h1, h2, h3 +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %r = call fast half @llvm.vector.reduce.fmul.f16.v16f16(half 1.0, <16 x half> %bin.rdx) ret half %r } define float @mul_2S(<8 x float> %bin.rdx) { -; CHECK-LABEL: mul_2S: -; CHECK: // %bb.0: -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mul_2S: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mul_2S: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx) ret float %r } @@ -233,15 +333,26 @@ define double @mul_2D(<4 x double> %bin.rdx) { ; added at least one test where the start value is not 1.0. define float @mul_S_init_42(<4 x float> %bin.rdx) { -; CHECK-LABEL: mul_S_init_42: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mul_S_init_42: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mov w8, #1109917696 // =0x42280000 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: fmov s1, w8 +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: fmul s0, s0, s1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mul_S_init_42: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: mov w8, #1109917696 // =0x42280000 +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 42.0, <4 x float> %bin.rdx) ret float %r } @@ -335,6 +446,51 @@ define half @fmul_reduct_reassoc_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2] ; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3] ; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fmul_reduct_reassoc_v8f16: +; CHECK-GI-NOFP16: // %bb.0: +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov d2, v0.d[1] +; CHECK-GI-NOFP16-NEXT: mov d3, v1.d[1] +; CHECK-GI-NOFP16-NEXT: fmul v0.2s, v0.2s, v2.2s +; CHECK-GI-NOFP16-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-GI-NOFP16-NEXT: mov s2, v0.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[1] +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 +; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s3 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fmul_reduct_reassoc_v8f16: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: mov d2, v0.d[1] +; CHECK-GI-FP16-NEXT: mov d3, v1.d[1] +; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v2.4h +; CHECK-GI-FP16-NEXT: fmul v1.4h, v1.4h, v3.4h +; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] +; CHECK-GI-FP16-NEXT: mov h6, v1.h[2] +; CHECK-GI-FP16-NEXT: mov h7, v1.h[3] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h2 +; CHECK-GI-FP16-NEXT: fmul h2, h3, h4 +; CHECK-GI-FP16-NEXT: fmul h1, h1, h5 +; CHECK-GI-FP16-NEXT: fmul h3, h6, h7 +; CHECK-GI-FP16-NEXT: fmul h0, h0, h2 +; CHECK-GI-FP16-NEXT: fmul h1, h1, h3 +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %r1 = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %a) %r2 = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %b) %r = fmul fast half %r1, %r2 @@ -342,15 +498,30 @@ define half @fmul_reduct_reassoc_v8f16(<8 x half> %a, <8 x half> %b) { } define float @fmul_reduct_reassoc_v8f32(<8 x float> %a, <8 x float> %b) { -; CHECK-LABEL: fmul_reduct_reassoc_v8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmul v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmul_reduct_reassoc_v8f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmul v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_reduct_reassoc_v8f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: fmul v1.4s, v2.4s, v3.4s +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s +; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: mov s3, v1.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s2 +; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %b) %r = fmul fast float %r1, %r2 @@ -358,13 +529,26 @@ define float @fmul_reduct_reassoc_v8f32(<8 x float> %a, <8 x float> %b) { } define float @fmul_reduct_reassoc_v4f32(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: fmul_reduct_reassoc_v4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s +; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: mov s3, v1.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s2 +; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %b) %r = fmul fast float %r1, %r2 @@ -372,17 +556,31 @@ define float @fmul_reduct_reassoc_v4f32(<4 x float> %a, <4 x float> %b) { } define float @fmul_reduct_reassoc_v4f32_init(float %i, <4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: fmul_reduct_reassoc_v4f32_init: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: fmul s1, s1, v1.s[1] -; CHECK-NEXT: fmul v2.2s, v2.2s, v3.2s -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fmul s1, s2, v2.s[1] -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f32_init: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-SD-NEXT: ext v3.16b, v2.16b, v2.16b, #8 +; CHECK-SD-NEXT: fmul s1, s1, v1.s[1] +; CHECK-SD-NEXT: fmul v2.2s, v2.2s, v3.2s +; CHECK-SD-NEXT: fmul s0, s0, s1 +; CHECK-SD-NEXT: fmul s1, s2, v2.s[1] +; CHECK-SD-NEXT: fmul s0, s0, s1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f32_init: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-GI-NEXT: mov d3, v2.d[1] +; CHECK-GI-NEXT: mov s4, v1.s[1] +; CHECK-GI-NEXT: fmul v2.2s, v2.2s, v3.2s +; CHECK-GI-NEXT: fmul s1, s1, s4 +; CHECK-GI-NEXT: mov s3, v2.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fmul s1, s2, s3 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %i, <4 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %b) %r = fmul fast float %r1, %r2 @@ -390,14 +588,28 @@ define float @fmul_reduct_reassoc_v4f32_init(float %i, <4 x float> %a, <4 x floa } define float @fmul_reduct_reassoc_v4v8f32(<4 x float> %a, <8 x float> %b) { -; CHECK-LABEL: fmul_reduct_reassoc_v4v8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmul_reduct_reassoc_v4v8f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmul v1.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_reduct_reassoc_v4v8f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmul v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s +; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: mov s3, v1.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s2 +; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %b) %r = fmul fast float %r1, %r2 @@ -405,13 +617,22 @@ define float @fmul_reduct_reassoc_v4v8f32(<4 x float> %a, <8 x float> %b) { } define double @fmul_reduct_reassoc_v4f64(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: fmul_reduct_reassoc_v4f64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d -; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d -; CHECK-NEXT: fmul v0.2d, v0.2d, v2.2d -; CHECK-NEXT: fmul d0, d0, v0.d[1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmul v2.2d, v2.2d, v3.2d +; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: fmul d0, d0, v0.d[1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmul v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: fmul v1.2d, v2.2d, v3.2d +; CHECK-GI-NEXT: fmul d0, d0, v0.d[1] +; CHECK-GI-NEXT: fmul d1, d1, v1.d[1] +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: ret %r1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a) %r2 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %b) %r = fmul fast double %r1, %r2 @@ -419,17 +640,31 @@ define double @fmul_reduct_reassoc_v4f64(<4 x double> %a, <4 x double> %b) { } define float @fmul_reduct_reassoc_v4f32_extrause(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: fmul_reduct_reassoc_v4f32_extrause: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v2.2s -; CHECK-NEXT: fmul v1.2s, v1.2s, v3.2s -; CHECK-NEXT: fmul s0, s0, v0.s[1] -; CHECK-NEXT: fmul s1, s1, v1.s[1] -; CHECK-NEXT: fmul s1, s0, s1 -; CHECK-NEXT: fmul s0, s1, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fmul_reduct_reassoc_v4f32_extrause: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] +; CHECK-SD-NEXT: fmul s1, s1, v1.s[1] +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: fmul s0, s1, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fmul_reduct_reassoc_v4f32_extrause: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s +; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s +; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: mov s3, v1.s[1] +; CHECK-GI-NEXT: fmul s0, s0, s2 +; CHECK-GI-NEXT: fmul s1, s1, s3 +; CHECK-GI-NEXT: fmul s1, s0, s1 +; CHECK-GI-NEXT: fmul s0, s1, s0 +; CHECK-GI-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) %r2 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %b) %r = fmul fast float %r1, %r2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll index 0df80d6..9580326 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @stack_write_fi() { ; CHECK-LABEL: stack_write_fi: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_add_u32 s0, s0, s17 +; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_mov_b32 s4, 0 @@ -23,3 +23,6 @@ entry: store volatile i64 0, ptr addrspace(5) %alloca, align 4 ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index c4e383c..44c4910 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -7,48 +7,47 @@ declare void @callee() define amdgpu_kernel void @call_debug_loc() { ; CHECK-LABEL: name: call_debug_loc ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !6 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !6 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !6 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !6 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6 - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6 - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6 - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6 - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6 - ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6 - ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !6 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !7 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13, debug-location !7 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12, debug-location !7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9, debug-location !7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !7 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF debug-location !7 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !7 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !7 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !7 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !7 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !7 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !7 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !7 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY14]], [[COPY1]], implicit $exec, debug-location !7 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !7 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !7 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[COPY]], implicit $exec, debug-location !7 + ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !7 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !7 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]], debug-location !7 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]], debug-location !7 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]], debug-location !7 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]], debug-location !7 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]], debug-location !7 + ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]], debug-location !7 + ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]], debug-location !7 + ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]], debug-location !7 + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]], debug-location !7 + ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !7 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !7 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !7 :: (dereferenceable invariant load (p0) from got, addrspace 4) + ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !7 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !7 ; CHECK-NEXT: S_ENDPGM 0 entry: call void @callee(), !dbg !6 @@ -60,11 +59,11 @@ define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !6 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !7 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !6 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !7 ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN entry: @@ -78,7 +77,7 @@ declare ptr @llvm.returnaddress(i32 immarg) #0 attributes #0 = { nofree nosync nounwind readnone willreturn } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.module.flags = !{!2, !3, !4, !5, !10} !0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git 4132dc917eddb446405cc5afef41167b8bce360b)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "gisel_1_gfx1031.cl", directory: "/home/matt/builds/conformance/2.0") @@ -90,3 +89,4 @@ attributes #0 = { nofree nosync nounwind readnone willreturn } !7 = distinct !DISubprogram(name: "call_debug_loc", scope: !1, file: !1, line: 8, type: !8, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !9) !8 = !DISubroutineType(types: !9) !9 = !{} +!10 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll index a2f1308..83c331c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -32,13 +32,13 @@ define void @call_result_align_1() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -81,13 +81,13 @@ define void @call_result_align_8() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -131,13 +131,13 @@ define void @declaration_result_align_8() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -181,11 +181,11 @@ define ptr addrspace(1) @tail_call_assert_align() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -209,3 +209,6 @@ entry: %call = tail call ptr addrspace(1) @returns_ptr_align8() ret ptr addrspace(1) %call } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 9359bde..8069698 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -30,8 +30,8 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) { ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %15(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %13(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) @@ -48,3 +48,6 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) { %oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst ret float %oldval } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 4cf02c93..ca889de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -9,37 +9,36 @@ declare hidden void @extern() define amdgpu_kernel void @kernel_call_no_workitem_ids() { ; CHECK-LABEL: name: kernel_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY7]], [[C]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY9]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY10]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY11]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -50,40 +49,39 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() { define amdgpu_kernel void @kernel_call_no_workgroup_ids() { ; CHECK-LABEL: name: kernel_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY7]], [[C]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -95,12 +93,12 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() { define amdgpu_kernel void @kernel_call_no_other_sgprs() { ; CHECK-LABEL: name: kernel_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4) @@ -139,12 +137,12 @@ define void @func_call_no_workitem_ids() { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] @@ -177,12 +175,12 @@ define void @func_call_no_workgroup_ids() { ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]](p4) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] @@ -226,3 +224,6 @@ define void @func_call_no_other_sgprs() { call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index 711e1b7..213598b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -11,52 +11,51 @@ declare hidden void @external_void_func_v32i32(<32 x i32>) #0 define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -64,52 +63,51 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; ; GFX908-LABEL: name: test_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -130,13 +128,13 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -171,13 +169,13 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -209,41 +207,40 @@ define void @test_func_call_external_void_func_i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -281,16 +278,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc @@ -298,41 +295,40 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; ; GFX908-LABEL: name: test_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -370,16 +366,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc @@ -400,7 +396,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -458,7 +454,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -528,7 +524,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -586,7 +582,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -651,84 +647,82 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 { ; GFX900-LABEL: name: test_only_workitem_id_x ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY10]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY11]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; ; GFX908-LABEL: name: test_only_workitem_id_x ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY10]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY11]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -739,45 +733,44 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 { ; GFX900-LABEL: name: test_only_workitem_id_y ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY10]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY11]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -785,45 +778,44 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; ; GFX908-LABEL: name: test_only_workitem_id_y ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY10]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY11]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -835,45 +827,44 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 { ; GFX900-LABEL: name: test_only_workitem_id_z ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY10]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY11]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -881,45 +872,44 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; ; GFX908-LABEL: name: test_only_workitem_id_z ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY10]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY11]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -931,46 +921,45 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !3 { ; GFX900-LABEL: name: test_only_workitem_id_xy ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64) + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]] ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -978,46 +967,45 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; ; GFX908-LABEL: name: test_only_workitem_id_xy ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64) + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]] ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1029,50 +1017,49 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !4 { ; GFX900-LABEL: name: test_only_workitem_id_yz ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64) + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C4]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1080,50 +1067,49 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; ; GFX908-LABEL: name: test_only_workitem_id_yz ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64) + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C4]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1135,46 +1121,45 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !5 { ; GFX900-LABEL: name: test_only_workitem_id_xz ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64) + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]] ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1182,46 +1167,45 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; ; GFX908-LABEL: name: test_only_workitem_id_xz ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64) + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]] ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1237,9 +1221,11 @@ declare i32 @llvm.amdgcn.workitem.id.z() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone speculatable willreturn } +!llvm.module.flags = !{!6} !0 = !{i32 64, i32 1, i32 1} !1 = !{i32 1, i32 64, i32 1} !2 = !{i32 1, i32 1, i32 64} !3 = !{i32 32, i32 2, i32 1} !4 = !{i32 1, i32 32, i32 2} !5 = !{i32 32, i32 1, i32 2} +!6 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 609883c..8b0a006 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -70,58 +70,57 @@ declare hidden i32 @external_gfx_i32_func_i32(i32) #0 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; GCN-LABEL: name: test_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, ptr addrspace(1) %out @@ -155,54 +154,53 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1) define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -233,54 +231,53 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i1_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 1 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) @@ -295,54 +292,53 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i1_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 1 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) @@ -357,54 +353,53 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) @@ -437,54 +432,53 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i8_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 8 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -499,54 +493,53 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i8_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 8 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) @@ -561,54 +554,53 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -620,54 +612,53 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 { define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i16_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 16 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) @@ -682,54 +673,53 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i16_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 16 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) @@ -744,55 +734,54 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_void() store volatile i32 %val, ptr addrspace(1) undef @@ -820,55 +809,54 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { define amdgpu_kernel void @test_call_external_i48_func_void() #0 { ; GCN-LABEL: name: test_call_external_i48_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1) @@ -881,55 +869,54 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 { define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i48_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) @@ -944,55 +931,54 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i48_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) @@ -1007,55 +993,54 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i64_func_void() #0 { ; GCN-LABEL: name: test_call_external_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1067,55 +1052,54 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 { define amdgpu_kernel void @test_call_external_p1_func_void() #0 { ; GCN-LABEL: name: test_call_external_p1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1127,58 +1111,57 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 { define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2p1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1191,55 +1174,54 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { define amdgpu_kernel void @test_call_external_p3_func_void() #0 { ; GCN-LABEL: name: test_call_external_p3_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3) + ; GCN-NEXT: G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 %val = call ptr addrspace(3) @external_p3_func_void() store volatile ptr addrspace(3) %val, ptr addrspace(3) undef @@ -1249,55 +1231,54 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 { define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2p3_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `ptr addrspace(3) undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 @@ -1309,54 +1290,53 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { define amdgpu_kernel void @test_call_external_f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1368,55 +1348,54 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 { define amdgpu_kernel void @test_call_external_f32_func_void() #0 { ; GCN-LABEL: name: test_call_external_f32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call float @external_f32_func_void() store volatile float %val, ptr addrspace(1) undef @@ -1426,55 +1405,54 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 { define amdgpu_kernel void @test_call_external_f64_func_void() #0 { ; GCN-LABEL: name: test_call_external_f64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1486,58 +1464,57 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 { define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2f64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1) @@ -1550,55 +1527,54 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1610,56 +1586,55 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1671,57 +1646,56 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v4i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1733,58 +1707,57 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v5i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1796,61 +1769,60 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v8i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1862,69 +1834,68 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v16i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1936,85 +1907,84 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v32i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2026,55 +1996,54 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i16> @external_v2i16_func_void() store volatile <2 x i16> %val, ptr addrspace(1) undef @@ -2084,55 +2053,54 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2146,55 +2114,54 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v4i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2206,55 +2173,54 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x half> @external_v2f16_func_void() store volatile <2 x half> %val, ptr addrspace(1) undef @@ -2264,55 +2230,54 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2326,55 +2291,54 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v4f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2386,56 +2350,55 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3f32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2447,58 +2410,57 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v5f32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2511,58 +2473,57 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, i64 } @external_i32_i64_func_void() @@ -2601,57 +2562,56 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_a2i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [2 x i32] @external_a2i32_func_void() %val.0 = extractvalue [2 x i32] %val, 0 @@ -2664,66 +2624,65 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { ; GCN-LABEL: name: test_call_external_a5i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) ; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) ; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) ; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) @@ -2749,52 +2708,51 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v32i32_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2816,52 +2774,51 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_i32_v32i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2883,52 +2840,51 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v33i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2943,18 +2899,17 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 { ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4) @@ -2964,40 +2919,40 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) ; GCN-NEXT: $vgpr1 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3012,3 +2967,6 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index 820e41d..ed68d82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -6,18 +6,17 @@ declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(p define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { ; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 ; GCN: bb.1 (%ir-block.1): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -30,24 +29,24 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -55,16 +54,16 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc @@ -89,3 +88,6 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval store volatile i32 %out.val1, ptr addrspace(1) undef ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 2ede504..e29d178 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -97,49 +97,48 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, define amdgpu_kernel void @test_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -174,12 +173,12 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -208,51 +207,50 @@ define void @test_func_call_external_void_func_void() #0 { define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 { ; CHECK-LABEL: name: test_call_external_void_func_empty_struct ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -264,51 +262,50 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 { define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 { ; CHECK-LABEL: name: test_call_external_void_func_empty_array ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -320,52 +317,51 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 { define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_i1_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -377,54 +373,53 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i1_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -437,54 +432,53 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -497,54 +491,53 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i8_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -556,55 +549,54 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i8_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8) ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -617,55 +609,54 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -678,52 +669,51 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_i16_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -735,54 +725,53 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i16_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -795,54 +784,53 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -855,52 +843,51 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -950,53 +937,52 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_i64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1008,56 +994,55 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1070,57 +1055,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1132,56 +1116,55 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i48 ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1194,56 +1177,55 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i48_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1256,56 +1238,55 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1318,54 +1299,53 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 { ; CHECK-LABEL: name: test_call_external_void_func_p0_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1377,56 +1357,55 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 { define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2p0 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1439,18 +1418,17 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -1459,24 +1437,24 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -1485,16 +1463,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1509,18 +1487,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 @@ -1529,24 +1506,24 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -1557,16 +1534,16 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1580,52 +1557,51 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_f16_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1637,51 +1613,50 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1693,55 +1668,54 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1753,57 +1727,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1815,18 +1788,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 @@ -1835,24 +1807,24 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -1860,16 +1832,16 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1881,53 +1853,52 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_f64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1939,57 +1910,56 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2001,42 +1971,41 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2045,16 +2014,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2066,52 +2035,51 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2124,57 +2092,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2187,57 +2154,56 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2250,54 +2216,53 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2310,18 +2275,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 @@ -2329,38 +2293,38 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2372,58 +2336,57 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2436,59 +2399,58 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v7i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2501,44 +2463,43 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v63i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2575,16 +2536,16 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc @@ -2597,44 +2558,43 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v65i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2674,16 +2634,16 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc @@ -2696,40 +2656,39 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v66i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -2770,16 +2729,16 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc @@ -2792,52 +2751,51 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2850,54 +2808,53 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2910,55 +2867,54 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2970,18 +2926,17 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -2989,39 +2944,39 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3033,18 +2988,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32 ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -3053,40 +3007,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3098,56 +3052,55 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3160,18 +3113,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3179,40 +3131,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3224,18 +3176,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3244,24 +3195,24 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3269,16 +3220,16 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3290,41 +3241,40 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3335,16 +3285,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3358,18 +3308,17 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3381,24 +3330,24 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C8]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C9]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C10]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3409,16 +3358,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3430,41 +3379,40 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3483,16 +3431,16 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -3506,41 +3454,40 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3578,16 +3525,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc @@ -3601,18 +3548,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32 ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -3621,24 +3567,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3679,16 +3625,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF2]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF3]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc @@ -3703,18 +3649,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) @@ -3723,24 +3668,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3751,10 +3696,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) @@ -3789,16 +3734,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF2]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF3]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc @@ -3815,18 +3760,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) @@ -3835,24 +3779,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3896,16 +3840,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF2]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF3]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc @@ -3921,18 +3865,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3941,39 +3884,39 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -4039,18 +3982,17 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val @@ -4060,40 +4002,40 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc @@ -4121,7 +4063,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1 @@ -4129,7 +4071,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -4181,13 +4123,13 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -4221,41 +4163,40 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4264,16 +4205,16 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -4287,41 +4228,40 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4333,16 +4273,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -4356,41 +4296,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4405,16 +4344,16 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -4428,41 +4367,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v8i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4489,16 +4427,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT14]](s32) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -4512,41 +4450,40 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4597,16 +4534,16 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -4620,18 +4557,17 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { ; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 @@ -4639,24 +4575,24 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -4701,16 +4637,16 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc @@ -4732,7 +4668,7 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) @@ -4765,7 +4701,7 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -4875,7 +4811,7 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) @@ -4908,7 +4844,7 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -5018,7 +4954,7 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) @@ -5047,7 +4983,7 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -5161,7 +5097,7 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) @@ -5190,7 +5126,7 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -5330,3 +5266,6 @@ main_body: attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll index 21e280e..ab40707 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -7,9 +7,9 @@ define amdgpu_kernel void @constant_fold_vector_add() { ; CHECK-LABEL: name: constant_fold_vector_add ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 @@ -22,3 +22,6 @@ entry: store <4 x i64> %add, ptr addrspace(1) null, align 32 ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index 4eba84f..0fb13bf9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -4,50 +4,49 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -73,3 +72,6 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) { call amdgpu_gfx void %fptr() ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index bb37e54..ceff84e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -4,10 +4,10 @@ define amdgpu_kernel void @asm_convergent() convergent{ ; CHECK-LABEL: name: asm_convergent ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0 ret void @@ -16,11 +16,11 @@ define amdgpu_kernel void @asm_convergent() convergent{ define amdgpu_kernel void @asm_simple_memory_clobber() { ; CHECK-LABEL: name: asm_simple_memory_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !1 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "", "~{memory}"(), !srcloc !0 call void asm sideeffect "", ""(), !srcloc !0 @@ -30,10 +30,10 @@ define amdgpu_kernel void @asm_simple_memory_clobber() { define amdgpu_kernel void @asm_simple_vgpr_clobber() { ; CHECK-LABEL: name: asm_simple_vgpr_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0 ret void @@ -42,10 +42,10 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() { define amdgpu_kernel void @asm_simple_sgpr_clobber() { ; CHECK-LABEL: name: asm_simple_sgpr_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0 ret void @@ -54,10 +54,10 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() { define amdgpu_kernel void @asm_simple_agpr_clobber() { ; CHECK-LABEL: name: asm_simple_agpr_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0 ret void @@ -66,9 +66,9 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %7, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, !1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -94,8 +94,8 @@ entry: define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -106,8 +106,8 @@ entry: define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -119,9 +119,9 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228234 /* regdef:VGPR_32 */, def %9 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %7, 2228234 /* regdef:VGPR_32 */, def %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -136,9 +136,9 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 3538954 /* regdef:VReg_64 */, def %9 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %7, 3538954 /* regdef:VReg_64 */, def %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %8 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -166,9 +166,9 @@ define float @test_vector_output() nounwind { define amdgpu_kernel void @test_input_vgpr_imm() { ; CHECK-LABEL: name: test_input_vgpr_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY1]] @@ -180,9 +180,9 @@ define amdgpu_kernel void @test_input_vgpr_imm() { define amdgpu_kernel void @test_input_sgpr_imm() { ; CHECK-LABEL: name: test_input_sgpr_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32) ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[COPY1]] @@ -194,9 +194,9 @@ define amdgpu_kernel void @test_input_sgpr_imm() { define amdgpu_kernel void @test_input_imm() { ; CHECK-LABEL: name: test_input_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42 ; CHECK-NEXT: INLINEASM &"s_mov_b64 s[0:1], $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42 ; CHECK-NEXT: S_ENDPGM 0 @@ -212,8 +212,8 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 2228233 /* reguse:VGPR_32 */, [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228233 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -227,8 +227,8 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 262158 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a) @@ -244,8 +244,8 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11 + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 @@ -256,14 +256,14 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %10 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %9 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %12, 2359305 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12 + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %11, 2359305 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %11 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -285,10 +285,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %11, 2228234 /* regdef:VGPR_32 */, def %12, 2228234 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13 + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2228234 /* regdef:VGPR_32 */, def %11, 2228234 /* regdef:VGPR_32 */, def %12, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %10 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %11 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %12 ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -306,11 +306,11 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %7 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %9, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -322,13 +322,15 @@ entry: define amdgpu_kernel void @asm_constraint_n_n() { ; CHECK-LABEL: name: asm_constraint_n_n ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: INLINEASM &"s_trap ${0:n}", 1 /* sideeffect attdialect */, 13 /* imm */, 10 ; CHECK-NEXT: S_ENDPGM 0 tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1 ret void } +!llvm.module.flags = !{!1} !0 = !{i32 70} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index cc4796e..b45b307 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -816,7 +816,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -933,7 +933,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -984,7 +984,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1097,7 +1097,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1176,14 +1176,14 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1225,14 +1225,14 @@ define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspa ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1274,13 +1274,13 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1321,13 +1321,13 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1368,7 +1368,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1377,7 +1377,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1422,14 +1422,14 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1471,7 +1471,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1482,7 +1482,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -1514,3 +1514,6 @@ entry: attributes #0 = { nounwind } attributes #1 = { nounwind noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll index edcb462..1ead1e4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -15,11 +15,11 @@ define void @tail_call_void_func_void() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @external_void_func_void ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -42,3 +42,6 @@ define void @tail_call_void_func_void() { tail call void @external_void_func_void() ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll index 387c5c7..a0f54bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -9,7 +9,8 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) { ; CI-LABEL: is_private_vgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CI-NEXT: s_load_dword s2, s[4:5], 0x32 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -18,9 +19,7 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) { ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x11 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CI-NEXT: flat_store_dword v[0:1], v0 ; CI-NEXT: s_endpgm @@ -79,9 +78,9 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) { define amdgpu_kernel void @is_private_sgpr(ptr %ptr) { ; CI-LABEL: is_private_sgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x11 +; CI-NEXT: s_load_dword s0, s[4:5], 0x32 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cbranch_scc1 .LBB1_2 @@ -150,3 +149,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0 attributes #0 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll index 75f1fea..4b3a475 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -9,7 +9,8 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { ; CI-LABEL: is_local_vgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CI-NEXT: s_load_dword s2, s[4:5], 0x33 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -18,9 +19,7 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x10 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CI-NEXT: flat_store_dword v[0:1], v0 ; CI-NEXT: s_endpgm @@ -79,9 +78,9 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { define amdgpu_kernel void @is_local_sgpr(ptr %ptr) { ; CI-LABEL: is_local_sgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x10 +; CI-NEXT: s_load_dword s0, s[4:5], 0x33 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cbranch_scc1 .LBB1_2 @@ -150,3 +149,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0 attributes #0 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index fec564f..e3779ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -75,8 +75,8 @@ bb.2: store volatile i32 0, ptr addrspace(1) undef ret void } -; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112 -; DEFAULTSIZE: ; ScratchSize: 4112 +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16 +; DEFAULTSIZE: ; ScratchSize: 16 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1040 ; ASSUME1024: ; ScratchSize: 1040 @@ -137,8 +137,8 @@ bb.1: ret void } -; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160 -; DEFAULTSIZE: ; ScratchSize: 4160 +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64 +; DEFAULTSIZE: ; ScratchSize: 64 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1088 ; ASSUME1024: ; ScratchSize: 1088 @@ -265,3 +265,9 @@ bb.1: declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ASSUME1024: {{.*}} +; DEFAULTSIZE: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 0897993..b356b9a 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -11,10 +11,10 @@ declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 ;. -; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4 -; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 -; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4 -; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 +; HSA: @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 +; HSA: @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 +; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4 +; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 ;. define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 { ; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast @@ -225,12 +225,19 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} ;. ; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +;. +; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500} +;. +; ATTRIBUTOR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500} ;. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll index 313bd85..195c5da 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll @@ -1,23 +1,27 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s -; GFX11-LABEL: {{^}}lds_direct_load: -; GFX11: s_mov_b32 m0 +; GCN-LABEL: {{^}}lds_direct_load: +; GCN: s_mov_b32 m0 ; GFX11: lds_direct_load v{{[0-9]+}} -; GFX11: s_mov_b32 m0 +; GFX12: ds_direct_load v{{[0-9]+}} +; GCN: s_mov_b32 m0 ; GFX11: lds_direct_load v{{[0-9]+}} -; GFX11: s_mov_b32 m0 +; GFX12: ds_direct_load v{{[0-9]+}} +; GCN: s_mov_b32 m0 ; GFX11: lds_direct_load v{{[0-9]+}} -; GFX11: s_waitcnt expcnt(2) -; GFX11: v_add_f32 -; GFX11: buffer_store_b32 -; GFX11: s_waitcnt expcnt(1) -; GFX11: buffer_store_b32 -; GFX11: s_waitcnt expcnt(0) -; GFX11: buffer_store_b32 -; GFX11: buffer_store_b32 -; GFX11: buffer_store_b32 -; GFX11: buffer_store_b32 +; GCN: s_waitcnt expcnt(2) +; GCN: v_add_f32 +; GCN: buffer_store_b32 +; GCN: s_waitcnt expcnt(1) +; GCN: buffer_store_b32 +; GCN: s_waitcnt expcnt(0) +; GCN: buffer_store_b32 +; GCN: buffer_store_b32 +; GCN: buffer_store_b32 +; GCN: buffer_store_b32 define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2) #0 { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll index 5a8b03e..1ab753d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll @@ -1,25 +1,32 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s -; GFX11-LABEL: {{^}}lds_param_load: -; GFX11: s_mov_b32 m0 +; GCN-LABEL: {{^}}lds_param_load: +; GCN: s_mov_b32 m0 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x -; GFX11: s_waitcnt expcnt(4) -; GFX11: v_add_f32 -; GFX11: buffer_store_b32 -; GFX11: s_waitcnt expcnt(3) -; GFX11: buffer_store_b32 -; GFX11: s_waitcnt expcnt(2) -; GFX11: buffer_store_b32 -; GFX11: s_waitcnt expcnt(1) -; GFX11: buffer_store_b32 -; GFX11: s_waitcnt expcnt(0) -; GFX11: buffer_store_b32 -; GFX11: buffer_store_b32 +; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x +; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y +; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z +; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w +; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x +; GCN: s_waitcnt expcnt(4) +; GCN: v_add_f32 +; GCN: buffer_store_b32 +; GCN: s_waitcnt expcnt(3) +; GCN: buffer_store_b32 +; GCN: s_waitcnt expcnt(2) +; GCN: buffer_store_b32 +; GCN: s_waitcnt expcnt(1) +; GCN: buffer_store_b32 +; GCN: s_waitcnt expcnt(0) +; GCN: buffer_store_b32 +; GCN: buffer_store_b32 define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 { main_body: %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll index c287789..dcf9b3f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll @@ -1,42 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12-SDAG %s +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-SDAG %s ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12-GISEL %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-GISEL %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s ; Scalar data prefetch define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1) ret void } define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr_offset: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x200, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_data s[0:1], 0x200, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr_offset: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr float, ptr addrspace(4) %ptr, i32 128 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) @@ -46,18 +38,14 @@ entry: ; Check large offsets define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr_max_offset: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_max_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_max_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr_max_offset: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) @@ -65,18 +53,14 @@ entry: } define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_min_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_min_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) @@ -96,6 +80,9 @@ define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inre ; ; GFX12-GISEL-LABEL: prefetch_data_sgpr_too_large_offset: ; GFX12-GISEL: ; %bb.0: ; %entry +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 +; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608 @@ -137,55 +124,43 @@ entry: ; Check supported address spaces define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr_flat: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_flat: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_flat: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr_flat: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1) ret void } define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr_global: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_global: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_global: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr_global: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1) ret void } define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_data_sgpr_constant_32bit: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_mov_b32 s1, 0 -; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_constant_32bit: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_mov_b32 s1, 0 +; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_constant_32bit: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_data_sgpr_constant_32bit: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1) ret void @@ -194,36 +169,28 @@ entry: ; I$ prefetch define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_inst_sgpr: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_inst_sgpr: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_inst_sgpr: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_inst_sgpr: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0) ret void } define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_inst_sgpr_offset: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_inst_sgpr_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_inst_sgpr_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_inst_sgpr_offset: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) @@ -233,18 +200,14 @@ entry: ; Check large offsets define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_inst_sgpr_max_offset: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_inst_sgpr_max_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_inst_sgpr_max_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_inst_sgpr_max_offset: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) @@ -252,18 +215,14 @@ entry: } define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0 -; GFX12-SDAG-NEXT: s_endpgm +; GFX12-LABEL: prefetch_inst_sgpr_min_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0 +; GFX12-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_inst_sgpr_min_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) @@ -283,6 +242,9 @@ define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inre ; ; GFX12-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset: ; GFX12-GISEL: ; %bb.0: ; %entry +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 +; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 ; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608 diff --git a/llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir b/llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir deleted file mode 100644 index 8e4e3be..0000000 --- a/llvm/test/CodeGen/PowerPC/coalescer-remat-with-undef-implicit-def-operand.mir +++ /dev/null @@ -1,28 +0,0 @@ -# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-coalescing -run-pass=register-coalescer \ -# RUN: -o - %s | FileCheck %s ---- -name: _Z13testTransposeIfLj31ELj17EEvv -alignment: 16 -tracksRegLiveness: true -frameInfo: - maxAlignment: 128 -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: _Z13testTransposeIfLj31ELj17EEvv - ; CHECK: undef %[[REG:[0-9]+]].sub_64:vsrc = IMPLICIT_DEF implicit-def %[[REG]] - bb.0: - liveins: $x2 - %2:vssrc = IMPLICIT_DEF - B %bb.2 - - bb.1: - %0:vsrc = SUBREG_TO_REG 1, killed %2, %subreg.sub_64 - %1:vsrc = XXPERMDI killed undef %0, killed %0, 0 - BLR8 implicit $lr8, implicit $rm - - bb.2: - successors: %bb.2(0x7c000000), %bb.1(0x04000000) - BDNZ8 %bb.2, implicit-def $ctr8, implicit $ctr8 - B %bb.1 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll b/llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll new file mode 100644 index 0000000..6f3e9f7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck %s + +define noundef signext i32 @ham(ptr nocapture noundef %arg) #0 { +; CHECK-LABEL: ham: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: lwz 4, 0(3) +; CHECK-NEXT: cmpwi 4, 750 +; CHECK-NEXT: addi 5, 4, 1 +; CHECK-NEXT: li 4, 1 +; CHECK-NEXT: bc 12, 0, L..BB0_1 +; CHECK-NEXT: b L..BB0_2 +; CHECK-NEXT: L..BB0_1: # %bb +; CHECK-NEXT: addi 4, 5, 0 +; CHECK-NEXT: L..BB0_2: # %bb +; CHECK-NEXT: stw 4, 0(3) +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr +bb: + %load = load i32, ptr %arg, align 4 + %icmp = icmp slt i32 %load, 750 + %add = add nsw i32 %load, 1 + %select = select i1 %icmp, i32 %add, i32 1 + store i32 %select, ptr %arg, align 4 + ret i32 0 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+power8-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-isa-v30-instructions,-isel,-power9-vector,-privileged,-rop-protect,-spe" } diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll index 3718ce8..7c98d4a 100644 --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -338,3 +338,28 @@ bb: } declare void @snork(ptr) + +define i8 @disjoint_or_lb(ptr %a, i32 %off) nounwind { +; RV32I-LABEL: disjoint_or_lb: +; RV32I: # %bb.0: +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: ret + %b = or disjoint i32 %off, 3 + %1 = getelementptr i8, ptr %a, i32 %b + %2 = load i8, ptr %1 + ret i8 %2 +} + +define i32 @disjoint_or_lw(ptr %a, i32 %off) nounwind { +; RV32I-LABEL: disjoint_or_lw: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: ret + %b = or disjoint i32 %off, 3 + %1 = getelementptr i32, ptr %a, i32 %b + %2 = load i32, ptr %1 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index fc94f8c..47d65c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1231,17 +1231,16 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_nxv1i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmv.v.x v9, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vwsubu.wv v9, v9, v8 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8 +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v9, a1 +; CHECK-F-NEXT: vzext.vf2 v9, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v9, a1 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1372,17 +1371,16 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_nxv2i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmv.v.x v10, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vwsubu.wv v10, v10, v8 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v10, a1 +; CHECK-F-NEXT: vzext.vf2 v10, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v10, a1 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1513,17 +1511,16 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_nxv4i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmv.v.x v12, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vwsubu.wv v12, v12, v8 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v12, a1 +; CHECK-F-NEXT: vzext.vf2 v12, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v12, a1 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -1654,17 +1651,16 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_nxv8i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmv.v.x v16, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8 -; CHECK-F-NEXT: vsrl.vi v8, v24, 23 -; CHECK-F-NEXT: vwsubu.wv v16, v16, v8 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v16, a1 +; CHECK-F-NEXT: vzext.vf2 v16, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v16, a1 +; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; @@ -2837,16 +2833,15 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmv.v.x v9, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vwsubu.wv v9, v9, v8 +; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8 +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-F-NEXT: vzext.vf2 v9, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v9, a1 ; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv1r.v v8, v9 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64: @@ -2973,16 +2968,15 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmv.v.x v10, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vwsubu.wv v10, v10, v8 +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-F-NEXT: vzext.vf2 v10, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v10, a1 ; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv2r.v v8, v10 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64: @@ -3109,16 +3103,15 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmv.v.x v12, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vwsubu.wv v12, v12, v8 +; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-F-NEXT: vzext.vf2 v12, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v12, a1 ; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv4r.v v8, v12 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64: @@ -3245,15 +3238,14 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) { ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64: ; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vmv8r.v v16, v8 -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16 -; CHECK-F-NEXT: vsrl.vi v16, v24, 23 -; CHECK-F-NEXT: vwsubu.wv v8, v8, v16 +; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-F-NEXT: vzext.vf2 v16, v8 +; CHECK-F-NEXT: li a1, 190 +; CHECK-F-NEXT: vrsub.vx v8, v16, a1 ; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index fe605d5..d99e3a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -10,12 +10,14 @@ ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING +; FIXME: We should use vwadd/vwsub/vwmul instructions. ; Check that the scalable vector add/sub/mul operations are all promoted into their ; vw counterpart when the folding of the web size is increased to 3. ; We need the web size to be at least 3 for the folding to happen, because ; %c has 3 uses. ; see https://github.com/llvm/llvm-project/pull/72340 +; FIXME: We don't currently use widening instructions. define <vscale x 2 x i16> @vwop_vscale_sext_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_sext_multiple_users: ; NO_FOLDING: # %bb.0: @@ -35,16 +37,18 @@ define <vscale x 2 x i16> @vwop_vscale_sext_multiple_users(ptr %x, ptr %y, ptr % ; ; FOLDING-LABEL: vwop_vscale_sext_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; FOLDING-NEXT: vle8.v v8, (a0) ; FOLDING-NEXT: vle8.v v9, (a1) ; FOLDING-NEXT: vle8.v v10, (a2) -; FOLDING-NEXT: vwmul.vv v11, v8, v9 -; FOLDING-NEXT: vwadd.vv v9, v8, v10 -; FOLDING-NEXT: vwsub.vv v12, v8, v10 -; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; FOLDING-NEXT: vor.vv v8, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v12 +; FOLDING-NEXT: vsext.vf2 v11, v8 +; FOLDING-NEXT: vsext.vf2 v8, v9 +; FOLDING-NEXT: vsext.vf2 v9, v10 +; FOLDING-NEXT: vmul.vv v8, v11, v8 +; FOLDING-NEXT: vadd.vv v10, v11, v9 +; FOLDING-NEXT: vsub.vv v9, v11, v9 +; FOLDING-NEXT: vor.vv v8, v8, v10 +; FOLDING-NEXT: vor.vv v8, v8, v9 ; FOLDING-NEXT: ret %a = load <vscale x 2 x i8>, ptr %x %b = load <vscale x 2 x i8>, ptr %y @@ -81,16 +85,18 @@ define <vscale x 2 x i16> @vwop_vscale_zext_multiple_users(ptr %x, ptr %y, ptr % ; ; FOLDING-LABEL: vwop_vscale_zext_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; FOLDING-NEXT: vle8.v v8, (a0) ; FOLDING-NEXT: vle8.v v9, (a1) ; FOLDING-NEXT: vle8.v v10, (a2) -; FOLDING-NEXT: vwmulu.vv v11, v8, v9 -; FOLDING-NEXT: vwaddu.vv v9, v8, v10 -; FOLDING-NEXT: vwsubu.vv v12, v8, v10 -; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; FOLDING-NEXT: vor.vv v8, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v12 +; FOLDING-NEXT: vzext.vf2 v11, v8 +; FOLDING-NEXT: vzext.vf2 v8, v9 +; FOLDING-NEXT: vzext.vf2 v9, v10 +; FOLDING-NEXT: vmul.vv v8, v11, v8 +; FOLDING-NEXT: vadd.vv v10, v11, v9 +; FOLDING-NEXT: vsub.vv v9, v11, v9 +; FOLDING-NEXT: vor.vv v8, v8, v10 +; FOLDING-NEXT: vor.vv v8, v8, v9 ; FOLDING-NEXT: ret %a = load <vscale x 2 x i8>, ptr %x %b = load <vscale x 2 x i8>, ptr %y diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll index de215c8..4dbaae5 100644 --- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll @@ -1141,3 +1141,39 @@ define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) { %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0 ret <4 x i32> %t1 } + +; (x|c1)+c2 where (x|c1) is addlike +define i32 @add_const_disjoint_or_const(i32 %arg) { +; X86-LABEL: add_const_disjoint_or_const: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $10, %eax +; X86-NEXT: retl +; +; X64-LABEL: add_const_disjoint_or_const: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 10(%rdi), %eax +; X64-NEXT: retq + %t0 = or disjoint i32 %arg, 8 + %t1 = add i32 %t0, 2 + ret i32 %t1 +} + +; (x+c1)|c2 where the outer or is addlike +define i32 @disjoint_or_const_add_const(i32 %arg) { +; X86-LABEL: disjoint_or_const_add_const: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $10, %eax +; X86-NEXT: retl +; +; X64-LABEL: disjoint_or_const_add_const: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 10(%rdi), %eax +; X64-NEXT: retq + %t0 = add i32 %arg, 8 + %t1 = or disjoint i32 %t0, 2 + ret i32 %t1 +} diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll deleted file mode 100644 index a3c3fc7..0000000 --- a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll +++ /dev/null @@ -1,185 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s - -%struct.wibble = type { %struct.wombat } -%struct.wombat = type { %struct.ham, [3 x i8] } -%struct.ham = type { %struct.zot } -%struct.zot = type { %struct.blam } -%struct.blam = type { %struct.ham.0 } -%struct.ham.0 = type { %struct.bar } -%struct.bar = type { %struct.bar.1 } -%struct.bar.1 = type { %struct.baz, i8 } -%struct.baz = type { %struct.snork } -%struct.snork = type <{ %struct.spam, i8, [3 x i8] }> -%struct.spam = type { %struct.snork.2, %struct.snork.2 } -%struct.snork.2 = type { i32 } -%struct.snork.3 = type { %struct.baz, i8, [3 x i8] } - -define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 -; CHECK-NEXT: pushq %r12 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_offset %rbx, -56 -; CHECK-NEXT: .cfi_offset %r12, -48 -; CHECK-NEXT: .cfi_offset %r13, -40 -; CHECK-NEXT: .cfi_offset %r14, -32 -; CHECK-NEXT: .cfi_offset %r15, -24 -; CHECK-NEXT: movl %r8d, %r14d -; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movq %rsi, %r13 -; CHECK-NEXT: movq %rdi, %r15 -; CHECK-NEXT: incl %r14d -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: # implicit-def: $r12 -; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_1: # %bb17 -; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: movq %r15, %r13 -; CHECK-NEXT: xorl %r15d, %r15d -; CHECK-NEXT: testq %rbx, %rbx -; CHECK-NEXT: sete %r15b -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq _Znwm@PLT -; CHECK-NEXT: shll $4, %r15d -; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; CHECK-NEXT: movq %r12, %rcx -; CHECK-NEXT: shrq $32, %rcx -; CHECK-NEXT: movb %cl, 12(%rax) -; CHECK-NEXT: movl %r12d, 8(%rax) -; CHECK-NEXT: movq %r15, %rbx -; CHECK-NEXT: movq %r13, %r15 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; CHECK-NEXT: decl %r14d -; CHECK-NEXT: je .LBB0_8 -; CHECK-NEXT: .LBB0_3: # %bb7 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: callq widget@PLT -; CHECK-NEXT: cmpb $-5, (%r13) -; CHECK-NEXT: jae .LBB0_5 -; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: movl %r12d, %r12d -; CHECK-NEXT: cmpq %r15, %rbx -; CHECK-NEXT: jbe .LBB0_1 -; CHECK-NEXT: jmp .LBB0_7 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_5: # %bb12 -; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: movq 0, %rax -; CHECK-NEXT: movq 8, %rax -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; CHECK-NEXT: cmpq %r15, %rbx -; CHECK-NEXT: jbe .LBB0_1 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: decl %r14d -; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: .LBB0_8: # %bb21 -; CHECK-NEXT: cmpb $0, 12(%rax) -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.9: # %bb26 -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa %rsp, 8 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_10: # %bb25 -; CHECK-NEXT: .cfi_def_cfa %rbp, 16 -; CHECK-NEXT: movq %r15, %rdi -; CHECK-NEXT: callq pluto@PLT -bb: - br label %bb7 - -bb5: ; preds = %bb17, %bb14 - %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ] - %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ] - %add = add i32 %phi9, 1 - %icmp = icmp eq i32 %phi9, %arg4 - br i1 %icmp, label %bb21, label %bb7 - -bb7: ; preds = %bb5, %bb - %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ] - %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ] - %phi10 = phi i40 [ undef, %bb ], [ %phi15, %bb5 ] - %call = call ptr @widget() - %load = load i8, ptr %arg1, align 8 - %icmp11 = icmp ult i8 %load, -5 - %and = and i40 %phi10, 4294967295 - br i1 %icmp11, label %bb14, label %bb12 - -bb12: ; preds = %bb7 - %load13 = load volatile { i64, i64 }, ptr null, align 4294967296 - br label %bb14 - -bb14: ; preds = %bb12, %bb7 - %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ] - %icmp16 = icmp ugt ptr %phi8, %arg - br i1 %icmp16, label %bb5, label %bb17 - -bb17: ; preds = %bb14 - %icmp18 = icmp eq ptr %phi8, null - %zext = zext i1 %icmp18 to i64 - %call19 = call ptr @_Znwm(i64 0) - %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext - %getelementptr20 = getelementptr i8, ptr %call19, i64 8 - store i40 %phi15, ptr %getelementptr20, align 4 - br label %bb5 - -bb21: ; preds = %bb5 - %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1 - %load23 = load i8, ptr %getelementptr22, align 4 - %icmp24 = icmp eq i8 %load23, 0 - br i1 %icmp24, label %bb26, label %bb25 - -bb25: ; preds = %bb21 - call void @pluto(ptr %arg) - unreachable - -bb26: ; preds = %bb21 - ret void -} - -define void @eggs(ptr %arg, ptr %arg1) { -; CHECK-LABEL: eggs: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: movq %rax, %rsi -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: xorl %r8d, %r8d -; CHECK-NEXT: callq foo@PLT -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq -bb: - call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0) - ret void -} - -declare ptr @widget() - -declare void @pluto(ptr) - -declare ptr @_Znwm(i64) - -attributes #0 = { noinline "frame-pointer"="all" } diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir index 190b140..8241a17 100644 --- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir +++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir @@ -9,7 +9,7 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555) ; CHECK-NEXT: liveins: $edi ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -28,7 +28,7 @@ body: | ; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al ; CHECK-NEXT: RET 0, killed undef $al ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir deleted file mode 100644 index fe53aef..0000000 --- a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir +++ /dev/null @@ -1,47 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s - - -# FIXME: Need to handle subrange updates when coalescing with subreg_to_reg -# This will fail if x86 enables subregister liveness. ---- -name: requires_new_subrange_coalesce_subreg_to_reg -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; CHECK-NEXT: liveins: $eax - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax - ; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF - ; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit - ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags - ; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit - ; CHECK-NEXT: RET 0, implicit %c - bb.0: - liveins: $eax - %init_eax:gr32 = COPY $eax - %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit - %b:gr32 = IMPLICIT_DEF - %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit - JCC_1 %bb.2, 4, implicit undef $eflags - - bb.1: - %imm0:gr32 = MOV32r0 implicit-def dead $eflags - %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit - %c.sub_32bit = COPY %a - - bb.2: - %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit - RET 0, implicit %c - -... diff --git a/llvm/test/CodeGen/X86/or-lea.ll b/llvm/test/CodeGen/X86/or-lea.ll index ab9b917..616ab99 100644 --- a/llvm/test/CodeGen/X86/or-lea.ll +++ b/llvm/test/CodeGen/X86/or-lea.ll @@ -825,3 +825,23 @@ entry: %or = or i64 %sub, 549755813889 ; 0x8000000001 ret i64 %or } + +define i32 @or_shift1_disjoint(i32 %x, i32 %y) { +; X86-LABEL: or_shift1_disjoint: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl +; +; X64-LABEL: or_shift1_disjoint: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rsi,%rdi,2), %eax +; X64-NEXT: retq + %shl = shl i32 %x, 1 + %or = or disjoint i32 %y, %shl + ret i32 %or +} + diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir deleted file mode 100644 index 6121a0b..0000000 --- a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir +++ /dev/null @@ -1,348 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 -# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s - -# We cannot lose the liveness of the high subregister of %1 when -# coalesced with %0, so introduce an implicit-def of the super -# register on the MOV. - ---- -name: coalesce_mov32r0_into_subreg_to_reg64 -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64 - ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1 - ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %0:gr32 = MOV32r0 implicit-def dead $eflags - %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit - $rdi = COPY %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - ---- -name: subreg_to_reg_folds_to_undef -tracksRegLiveness: true -body: | - bb.0: - liveins: $rax - - ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef - ; CHECK: liveins: $rax - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax - ; CHECK-NEXT: undef %4.sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def %4 - ; CHECK-NEXT: RET 0, implicit %4 - %0:gr64 = COPY killed $rax - %1:gr32 = COPY killed %0.sub_32bit - %2:gr32 = MOV32rr killed %1 - %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit - %4:gr64 = COPY killed %3 - RET 0, implicit %4 - -... - ---- -name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64 -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64 - ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags - ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags - %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit - $rdi = COPY %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - ---- -name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64 -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64 - ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1 - ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0 - %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit - $rdi = COPY %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - ---- -name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg - ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef %1.sub_8bit, implicit-def %1 - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit %1 - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit - %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit - INLINEASM &"", 0, implicit %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - - -# Reduced realistic case which was asserting after introducing new implicit-defs ---- -name: coalesce_needs_implicit_defs -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: coalesce_needs_implicit_defs - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $rdi - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: undef %2.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %2 - ; CHECK-NEXT: undef %3.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %10.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags - ; CHECK-NEXT: TEST64rr %3, %3, implicit-def $eflags - ; CHECK-NEXT: %10.sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags - ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi - ; CHECK-NEXT: CALL64r %2, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[SHL64ri]], 4, implicit-def dead $eflags - ; CHECK-NEXT: [[ADD64rr:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[ADD64rr]], [[COPY]], implicit-def dead $eflags - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[ADD64rr]] - ; CHECK-NEXT: JMP_1 %bb.1 - bb.0: - liveins: $rdi - - %0:gr64 = COPY killed $rdi - %1:gr32 = MOV32r0 implicit-def dead $eflags - %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit - %3:gr64 = COPY killed %2 - - bb.1: - %4:gr64 = COPY killed %3 - %5:gr32 = MOV32r0 implicit-def dead $eflags - TEST64rr killed %4, %4, implicit-def $eflags - %6:gr8 = SETCCr 4, implicit killed $eflags - %7:gr32 = COPY killed %5 - %7.sub_8bit:gr32 = COPY killed %6 - %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit - $rdi = COPY %9 - CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %10:gr64 = COPY killed %8 - %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags - %11:gr64 = COPY killed %10 - %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags - %3:gr64 = COPY killed %11 - JMP_1 %bb.1 - -... - ---- -name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def - ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi - ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %0:gr32 = MOV32r0 implicit-def dead $eflags - $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit - CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - ---- -name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use -tracksRegLiveness: true -body: | - bb.0: - liveins: $eax - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use - ; CHECK: liveins: $eax - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit - ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - $eax = MOV32r0 implicit-def dead $eflags - %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit - $rdi = COPY %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - -# Coalesced instruction is a copy with other implicit operands ---- -name: coalesce_copy_into_subreg_to_reg64 -tracksRegLiveness: true -body: | - bb.0: - liveins: $eax - ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64 - ; CHECK: liveins: $eax - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def %1 - ; CHECK-NEXT: $rdi = COPY %1 - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %0:gr32 = COPY $eax, implicit-def dead $eflags - %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit - $rdi = COPY %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - ---- -name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value - ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: undef %1.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %1 - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def %1.sub_32bit, implicit %1.sub_32bit - ; CHECK-NEXT: $rdi = COPY %1 - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %0:gr32 = MOV32r0 implicit-def dead $eflags - INLINEASM &"", 0, implicit-def %0, implicit %0 - %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit - $rdi = COPY %1 - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - -... - ---- -name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1 - ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags - ; CHECK-NEXT: JMP_1 %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: $rdi = COPY %1 - ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: RET 0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - bb.0: - INLINEASM &"", 0, implicit-def %0:gr32 - JCC_1 %bb.1, 4, implicit undef $eflags - JMP_1 %bb.2 - - bb.1: - %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit - $rdi = COPY %1 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - RET 0 - - bb.2: - -... - ---- -name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1 - ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags - ; CHECK-NEXT: JMP_1 %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $rdi = COPY %1 - ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: JMP_1 %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - bb.0: - - INLINEASM &"", 0, implicit-def %0:gr32 - JCC_1 %bb.1, 4, implicit undef $eflags - JMP_1 %bb.2 - - bb.1: - %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit - $rdi = COPY %1 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - JMP_1 %bb.1 - - bb.2: - -... diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll index 22e353f..44f2d58 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll @@ -2258,7 +2258,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-SLOW-NEXT: vpmovqw %zmm1, %xmm3 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 ; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %ymm3 ; AVX512F-SLOW-NEXT: vpmovqw %ymm3, %xmm3 ; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm10 @@ -2273,7 +2272,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7] ; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm11 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm10 = ymm11[0,1,2,3],ymm10[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm10[0,1,2,3],zmm2[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm10[0,1,2,3],zmm2[0,1,2,3] ; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %xmm10 ; AVX512F-SLOW-NEXT: vmovdqa 80(%rdi), %xmm11 ; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %xmm13 @@ -2292,7 +2291,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm1, %zmm9 ; AVX512F-SLOW-NEXT: vpmovqw %zmm9, %xmm9 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm9[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm5 ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm8[0,1,1,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm12[0,1,1,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1] @@ -2307,7 +2305,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm0, %zmm9 ; AVX512F-SLOW-NEXT: vpmovqw %zmm9, %xmm9 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm8 = ymm9[0,1,2,3],ymm8[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm8[0,1,2,3],zmm5[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm8[0,1,2,3],zmm5[0,1,2,3] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[3,1,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm6[0,1,2,0,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[3,1,2,3] @@ -2324,7 +2322,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm1, %zmm13 ; AVX512F-SLOW-NEXT: vpmovqw %zmm13, %xmm13 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm12 = ymm13[0,1,2,3],ymm12[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm0, %zmm12 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm13 = xmm3[0,1,2,0,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[3,1,2,3] @@ -2341,7 +2338,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm0, %zmm14 ; AVX512F-SLOW-NEXT: vpmovqw %zmm14, %xmm14 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm13 = ymm14[0,1,2,3],ymm13[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm12 = zmm13[0,1,2,3],zmm12[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm12 = zmm13[0,1,2,3],zmm12[0,1,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,3,1,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[0,1,3,1,4,5,6,7] ; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] @@ -2354,7 +2351,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm1, %zmm1 ; AVX512F-SLOW-NEXT: vpmovqw %zmm1, %xmm1 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm6[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,3,1,4,5,6,7] ; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] @@ -2367,7 +2363,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm0, %zmm0 ; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,2,3] ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, (%rsi) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm5, (%rdx) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm12, (%rcx) @@ -2393,7 +2389,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm7, %ymm11, %ymm10 ; AVX512F-FAST-NEXT: vpmovqw %zmm1, %xmm7 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1,2,3],ymm10[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm7 ; AVX512F-FAST-NEXT: vmovdqa 96(%rdi), %ymm10 ; AVX512F-FAST-NEXT: vpermd %ymm10, %ymm4, %ymm12 ; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm12, %ymm13 @@ -2403,7 +2398,7 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm13, %ymm11, %ymm4 ; AVX512F-FAST-NEXT: vpmovqw %zmm0, %xmm13 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm4 = ymm13[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm4[0,1,2,3],zmm7[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm4[0,1,2,3],zmm7[0,1,2,3] ; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31] ; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm5, %ymm13 ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u> @@ -2412,14 +2407,13 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpsrlq $16, %zmm1, %zmm13 ; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm9 = ymm13[0,1,2,3],ymm9[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm9 ; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm12, %ymm12 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm15, %ymm13 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm13[0,1,2,3,4,5],ymm12[6,7] ; AVX512F-FAST-NEXT: vpsrlq $16, %zmm0, %zmm13 ; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm13[0,1,2,3],ymm12[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm12[0,1,2,3],zmm9[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm12[0,1,2,3],zmm9[0,1,2,3] ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm12 = [1,3,2,3,1,3,5,7] ; AVX512F-FAST-NEXT: vpermd %ymm6, %ymm12, %ymm6 ; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm6, %ymm13 @@ -2429,7 +2423,6 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpsrlq $32, %zmm1, %zmm13 ; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3],ymm15[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm13, %zmm0, %zmm13 ; AVX512F-FAST-NEXT: vpermd %ymm10, %ymm12, %ymm10 ; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm10, %ymm2 ; AVX512F-FAST-NEXT: vpermd %ymm14, %ymm12, %ymm12 @@ -2438,21 +2431,20 @@ define void @load_i16_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpsrlq $32, %zmm0, %zmm2 ; AVX512F-FAST-NEXT: vpmovqw %zmm2, %xmm2 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm13[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm13[0,1,2,3] ; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm6, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm8, %ymm6 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-FAST-NEXT: vpsrlq $48, %zmm1, %zmm1 ; AVX512F-FAST-NEXT: vpmovqw %zmm1, %xmm1 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 ; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm10, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm12, %ymm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-FAST-NEXT: vpsrlq $48, %zmm0, %zmm0 ; AVX512F-FAST-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,2,3] ; AVX512F-FAST-NEXT: vmovdqa64 %zmm7, (%rsi) ; AVX512F-FAST-NEXT: vmovdqa64 %zmm9, (%rdx) ; AVX512F-FAST-NEXT: vmovdqa64 %zmm2, (%rcx) @@ -4909,285 +4901,276 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; ; AVX512F-SLOW-LABEL: load_i16_stride4_vf64: ; AVX512F-SLOW: # %bb.0: -; AVX512F-SLOW-NEXT: subq $104, %rsp -; AVX512F-SLOW-NEXT: vmovdqa 240(%rdi), %xmm0 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm0[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqa 224(%rdi), %xmm2 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: subq $200, %rsp +; AVX512F-SLOW-NEXT: vmovdqa64 256(%rdi), %zmm26 +; AVX512F-SLOW-NEXT: vmovdqa64 384(%rdi), %zmm27 +; AVX512F-SLOW-NEXT: vmovdqa64 (%rdi), %zmm28 +; AVX512F-SLOW-NEXT: vmovdqa64 128(%rdi), %zmm29 +; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %ymm0 +; AVX512F-SLOW-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-SLOW-NEXT: vmovdqa 240(%rdi), %xmm14 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm14[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm9[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa 224(%rdi), %xmm13 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm13[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm6[0,1,0,2,4,5,6,7] ; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 112(%rdi), %xmm20 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm14 = xmm20[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm14[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %xmm3 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm3[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 496(%rdi), %xmm19 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm19[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 480(%rdi), %xmm21 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm21[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 368(%rdi), %xmm17 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm17[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 352(%rdi), %xmm24 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm24[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm1[0,1,0,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm18 = xmm9[0],xmm6[0],xmm9[1],xmm6[1] -; AVX512F-SLOW-NEXT: vmovdqa64 320(%rdi), %xmm25 -; AVX512F-SLOW-NEXT: vmovdqa64 336(%rdi), %xmm29 -; AVX512F-SLOW-NEXT: vmovdqa 448(%rdi), %xmm11 -; AVX512F-SLOW-NEXT: vmovdqa64 464(%rdi), %xmm16 -; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %xmm9 -; AVX512F-SLOW-NEXT: vmovdqa 80(%rdi), %xmm12 -; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %xmm13 -; AVX512F-SLOW-NEXT: vmovdqa 208(%rdi), %xmm6 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm10[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm10[0],xmm5[1],xmm10[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm6[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm13[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm10[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm10[0],xmm5[0],xmm10[1],xmm5[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm14[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm12[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm9[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm27 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm8[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] -; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm16[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; AVX512F-SLOW-NEXT: vpmovqw %zmm29, %xmm1 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %ymm1 +; AVX512F-SLOW-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-SLOW-NEXT: vmovdqa 112(%rdi), %xmm12 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm12[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %xmm11 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm11[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm7[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6,7] +; AVX512F-SLOW-NEXT: vpmovqw %zmm28, %xmm4 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vmovdqa 448(%rdi), %ymm0 +; AVX512F-SLOW-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-SLOW-NEXT: vmovdqa64 496(%rdi), %xmm24 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm24[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm4[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 480(%rdi), %xmm23 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm23[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; AVX512F-SLOW-NEXT: vpmovqw %zmm27, %xmm1 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa 320(%rdi), %ymm1 +; AVX512F-SLOW-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-SLOW-NEXT: vmovdqa64 368(%rdi), %xmm31 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm31[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm5[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 352(%rdi), %xmm25 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm25[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm8[0,1,0,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm10[0],xmm0[0],xmm10[1],xmm0[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; AVX512F-SLOW-NEXT: vpmovqw %zmm26, %xmm1 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vmovdqa64 320(%rdi), %xmm30 +; AVX512F-SLOW-NEXT: vmovdqa64 336(%rdi), %xmm17 +; AVX512F-SLOW-NEXT: vmovdqa64 448(%rdi), %xmm18 +; AVX512F-SLOW-NEXT: vmovdqa64 464(%rdi), %xmm19 +; AVX512F-SLOW-NEXT: vmovdqa64 64(%rdi), %xmm20 +; AVX512F-SLOW-NEXT: vmovdqa64 80(%rdi), %xmm21 +; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %xmm0 +; AVX512F-SLOW-NEXT: vmovdqa 208(%rdi), %xmm1 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm9[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm9[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm9[0],xmm6[0],xmm9[1],xmm6[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm29, %zmm6 +; AVX512F-SLOW-NEXT: vpmovqw %zmm6, %xmm6 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm7[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm21[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm20[0,2,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm22 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm26 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm29[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm25[0,2,2,3] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm28, %zmm6 +; AVX512F-SLOW-NEXT: vpmovqw %zmm6, %xmm6 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, (%rsp) # 64-byte Spill +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm4[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm15[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm19[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm18[0,2,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm30 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm0[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm10[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm14 = xmm2[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm14[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm28 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm6[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm13[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm2[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm31 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm20[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm23 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm12[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm15[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm9[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm20 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm19[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm21[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm12[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm27, %zmm3 +; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,1,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm17[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm30[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm26, %zmm4 +; AVX512F-SLOW-NEXT: vpmovqw %zmm4, %xmm4 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm14[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm2, %xmm22 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm13[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm21 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm16[3,1,2,3] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[3,1,2,3] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm6[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,1,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm7[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm11[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm19 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm17[3,1,2,3] -; AVX512F-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm24[3,1,2,3] -; AVX512F-SLOW-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm4[0,1,2,0,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm17 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm29[3,1,2,3] -; AVX512F-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm25[3,1,2,3] -; AVX512F-SLOW-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm4[2,0,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm16 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] -; AVX512F-SLOW-NEXT: vmovdqa 192(%rdi), %ymm0 -; AVX512F-SLOW-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm0[0,1,2,3,4,5],ymm5[6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 128(%rdi), %zmm0 -; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm4 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4 -; AVX512F-SLOW-NEXT: vmovdqa 64(%rdi), %ymm5 -; AVX512F-SLOW-NEXT: vpmovqw %ymm5, %xmm5 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm8 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm8[6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 (%rdi), %zmm29 -; AVX512F-SLOW-NEXT: vpmovqw %zmm29, %xmm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm5[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa 448(%rdi), %ymm4 -; AVX512F-SLOW-NEXT: vpmovqw %ymm4, %xmm4 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-SLOW-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm5[6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 384(%rdi), %zmm25 -; AVX512F-SLOW-NEXT: vpmovqw %zmm25, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqa 320(%rdi), %ymm5 -; AVX512F-SLOW-NEXT: vpmovqw %ymm5, %xmm5 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm18, %ymm0, %ymm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm8[6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 256(%rdi), %zmm18 -; AVX512F-SLOW-NEXT: vpmovqw %zmm18, %xmm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4 -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm5[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm4 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm0, %zmm5 -; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm27, %ymm0, %ymm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm29, %zmm8 -; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4 -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm27 = zmm5[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm4 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm22, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm25, %zmm5 -; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm26, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm30, %ymm0, %ymm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $16, %zmm18, %zmm8 -; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4 -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm22 = zmm5[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm10[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm14[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm3[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm29, %zmm1 +; AVX512F-SLOW-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm12[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm11[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm3[0,1,2,0,4,5,6,7] ; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm28, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm31, %ymm0, %ymm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm0, %zmm8 -; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3],ymm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm5 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm2[0],xmm6[0],xmm2[1],xmm6[1] -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm23, %ymm0, %ymm2 -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm20, %ymm0, %ymm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm8[0,1,2,3,4,5],ymm2[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm29, %zmm8 -; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm8[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm5[4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm21, %ymm0, %ymm3 -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm19, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm5[0,1,2,3,4,5],ymm3[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm25, %zmm5 -; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm9[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm8[0],xmm5[0],xmm8[1],xmm5[1] -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm17, %ymm0, %ymm8 -; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm16, %ymm0, %ymm9 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm8 = ymm9[0,1,2,3,4,5],ymm8[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm18, %zmm9 -; AVX512F-SLOW-NEXT: vpmovqw %zmm9, %xmm9 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm8 = ymm9[0,1,2,3],ymm8[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm8[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm12[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm13[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1] ; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm6[0,1,2,3,4,5],ymm4[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm0, %zmm0 -; AVX512F-SLOW-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm11[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3,4,5],ymm1[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm29, %zmm5 -; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3],ymm1[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm1 = mem[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm5 = mem[0,1,3,1,4,5,6,7] -; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1] -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm5[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm25, %zmm5 -; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm4 -; AVX512F-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm5 = mem[3,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm6 = mem[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm21[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm0[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm0, %xmm21 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm20[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm0[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm0, %xmm16 +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm8 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm8[0,1,2,3,4,5],ymm4[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm28, %zmm8 +; AVX512F-SLOW-NEXT: vpmovqw %zmm8, %xmm8 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm4 = ymm8[0,1,2,3],ymm4[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm4[0,1,2,3],zmm1[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm24[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm23[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm11[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm20 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm19[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm12[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm18[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm1[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm18 = xmm8[0],xmm4[0],xmm8[1],xmm4[1] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm31[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm4[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm25[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm9 = xmm15[0,1,2,0,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm19 = xmm9[0],xmm8[0],xmm9[1],xmm8[1] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm17[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm8[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm30[3,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm14 = xmm9[2,0,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm14[0],xmm0[0],xmm14[1],xmm0[1] +; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm20, %ymm0, %ymm14 +; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm18, %ymm0, %ymm5 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm14[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm27, %zmm14 +; AVX512F-SLOW-NEXT: vpmovqw %zmm14, %xmm14 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm5 = ymm14[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-SLOW-NEXT: vinserti32x4 $1, %xmm19, %ymm0, %ymm14 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm14[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $32, %zmm26, %zmm14 +; AVX512F-SLOW-NEXT: vpmovqw %zmm14, %xmm14 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm14[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm17 = zmm0[0,1,2,3],zmm5[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm22, %xmm0 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm13[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm6[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm7[3,1,2,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm29, %zmm3 +; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm21, %xmm3 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm16, %xmm5 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm28, %zmm3 +; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[0,1,2,3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm10[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm11[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm12[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,3,1,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3,4,5],ymm1[6,7] -; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm18, %zmm5 -; AVX512F-SLOW-NEXT: vpmovqw %zmm5, %xmm5 -; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3],ymm1[4,5,6,7] -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm24, 64(%rsi) -; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm4 # 64-byte Reload -; AVX512F-SLOW-NEXT: vmovaps %zmm4, (%rsi) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm22, 64(%rdx) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm27, (%rdx) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm3, 64(%rcx) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, (%rcx) +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm27, %zmm2 +; AVX512F-SLOW-NEXT: vpmovqw %zmm2, %xmm2 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm8[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm9[3,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7] +; AVX512F-SLOW-NEXT: vpsrlq $48, %zmm26, %zmm3 +; AVX512F-SLOW-NEXT: vpmovqw %zmm3, %xmm3 +; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[0,1,2,3] +; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload +; AVX512F-SLOW-NEXT: vmovaps %zmm2, 64(%rsi) +; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload +; AVX512F-SLOW-NEXT: vmovaps %zmm2, (%rsi) +; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload +; AVX512F-SLOW-NEXT: vmovaps %zmm2, 64(%rdx) +; AVX512F-SLOW-NEXT: vmovups (%rsp), %zmm2 # 64-byte Reload +; AVX512F-SLOW-NEXT: vmovaps %zmm2, (%rdx) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm17, 64(%rcx) +; AVX512F-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload +; AVX512F-SLOW-NEXT: vmovaps %zmm2, (%rcx) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm1, 64(%r8) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm0, (%r8) -; AVX512F-SLOW-NEXT: addq $104, %rsp +; AVX512F-SLOW-NEXT: addq $200, %rsp ; AVX512F-SLOW-NEXT: vzeroupper ; AVX512F-SLOW-NEXT: retq ; @@ -5211,7 +5194,6 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm0, %ymm7, %ymm3 ; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm0 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-FAST-NEXT: vmovdqa64 96(%rdi), %ymm27 ; AVX512F-FAST-NEXT: vpermd %ymm27, %ymm1, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm3, %ymm9 @@ -5221,7 +5203,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm9, %ymm7, %ymm12 ; AVX512F-FAST-NEXT: vpmovqw %zmm30, %xmm9 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm12[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm9[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm9[0,1,2,3],zmm0[0,1,2,3] ; AVX512F-FAST-NEXT: vmovdqa64 480(%rdi), %ymm16 ; AVX512F-FAST-NEXT: vpermd %ymm16, %ymm1, %ymm0 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm9 @@ -5231,7 +5213,6 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm9, %ymm7, %ymm13 ; AVX512F-FAST-NEXT: vpmovqw %zmm26, %xmm9 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm13[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm9 ; AVX512F-FAST-NEXT: vmovdqa64 352(%rdi), %ymm18 ; AVX512F-FAST-NEXT: vpermd %ymm18, %ymm1, %ymm13 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm13, %ymm14 @@ -5241,7 +5222,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm14, %ymm7, %ymm15 ; AVX512F-FAST-NEXT: vpmovqw %zmm23, %xmm14 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm14 = ymm14[0,1,2,3],ymm15[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm22 = zmm14[0,1,2,3],zmm9[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm22 = zmm14[0,1,2,3],zmm9[0,1,2,3] ; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm9 = [18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31,18,19,22,23,26,27,30,31] ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm10, %ymm14 ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm10 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u> @@ -5250,28 +5231,26 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpsrlq $16, %zmm4, %zmm14 ; AVX512F-FAST-NEXT: vpmovqw %zmm14, %xmm14 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm11 = ymm14[0,1,2,3],ymm11[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm11, %zmm0, %zmm11 ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm3, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm8, %ymm8 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm8[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-FAST-NEXT: vpsrlq $16, %zmm30, %zmm8 ; AVX512F-FAST-NEXT: vpmovqw %zmm8, %xmm8 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm8[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm29 = zmm3[0,1,2,3],zmm11[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm29 = zmm3[0,1,2,3],zmm11[0,1,2,3] ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm0, %ymm0 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm12, %ymm3 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7] ; AVX512F-FAST-NEXT: vpsrlq $16, %zmm26, %zmm3 ; AVX512F-FAST-NEXT: vpmovqw %zmm3, %xmm3 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm13, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm1, %ymm1 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-FAST-NEXT: vpsrlq $16, %zmm23, %zmm3 ; AVX512F-FAST-NEXT: vpmovqw %zmm3, %xmm3 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm19 = zmm1[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm19 = zmm1[0,1,2,3],zmm0[0,1,2,3] ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm15 = [1,3,2,3,1,3,5,7] ; AVX512F-FAST-NEXT: vpermd %ymm24, %ymm15, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm3, %ymm0 @@ -5280,8 +5259,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm0, %ymm7, %ymm1 ; AVX512F-FAST-NEXT: vpsrlq $32, %zmm4, %zmm0 ; AVX512F-FAST-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 +; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512F-FAST-NEXT: vpermd %ymm27, %ymm15, %ymm0 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm13 ; AVX512F-FAST-NEXT: vpermd %ymm28, %ymm15, %ymm12 @@ -5290,7 +5268,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpsrlq $32, %zmm30, %zmm13 ; AVX512F-FAST-NEXT: vpmovqw %zmm13, %xmm13 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3],ymm14[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm13[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm13[0,1,2,3],zmm1[0,1,2,3] ; AVX512F-FAST-NEXT: vpermd %ymm16, %ymm15, %ymm13 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm13, %ymm1 ; AVX512F-FAST-NEXT: vpermd %ymm17, %ymm15, %ymm14 @@ -5298,8 +5276,7 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpermt2d %ymm1, %ymm7, %ymm11 ; AVX512F-FAST-NEXT: vpsrlq $32, %zmm26, %zmm1 ; AVX512F-FAST-NEXT: vpmovqw %zmm1, %xmm1 -; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm11[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm11 +; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm11 = ymm1[0,1,2,3],ymm11[4,5,6,7] ; AVX512F-FAST-NEXT: vpermd %ymm18, %ymm15, %ymm1 ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm1, %ymm2 ; AVX512F-FAST-NEXT: vpermd %ymm20, %ymm15, %ymm5 @@ -5308,35 +5285,33 @@ define void @load_i16_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt ; AVX512F-FAST-NEXT: vpsrlq $32, %zmm23, %zmm2 ; AVX512F-FAST-NEXT: vpmovqw %zmm2, %xmm2 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm6[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm11[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm11[0,1,2,3] ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm3, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm8, %ymm6 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-FAST-NEXT: vpsrlq $48, %zmm4, %zmm4 ; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3 ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm0, %ymm0 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm12, %ymm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3,4,5],ymm0[6,7] ; AVX512F-FAST-NEXT: vpsrlq $48, %zmm30, %zmm4 ; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm3[0,1,2,3] ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm13, %ymm3 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm14, %ymm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7] ; AVX512F-FAST-NEXT: vpsrlq $48, %zmm26, %zmm4 ; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3 ; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm1, %ymm1 ; AVX512F-FAST-NEXT: vpshufb %ymm10, %ymm5, %ymm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3,4,5],ymm1[6,7] ; AVX512F-FAST-NEXT: vpsrlq $48, %zmm23, %zmm4 ; AVX512F-FAST-NEXT: vpmovqw %zmm4, %xmm4 ; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm3[0,1,2,3] ; AVX512F-FAST-NEXT: vmovdqa64 %zmm22, 64(%rsi) ; AVX512F-FAST-NEXT: vmovdqa64 %zmm21, (%rsi) ; AVX512F-FAST-NEXT: vmovdqa64 %zmm19, 64(%rdx) diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll index e2195f1..e231124 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll @@ -1943,8 +1943,7 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,0,4,0,4,8,12] ; AVX512F-NEXT: vpermt2d %ymm5, %ymm1, %ymm6 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm5 -; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm6[4,5,6,7] -; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm8 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm8 = ymm5[0,1,2,3],ymm6[4,5,6,7] ; AVX512F-NEXT: vmovdqa 96(%rdi), %ymm5 ; AVX512F-NEXT: vpshufb %ymm7, %ymm5, %ymm9 ; AVX512F-NEXT: vmovdqa 64(%rdi), %ymm6 @@ -1952,7 +1951,7 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512F-NEXT: vpermt2d %ymm9, %ymm1, %ymm7 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm9 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm7 = ymm9[0,1,2,3],ymm7[4,5,6,7] -; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm7[0,1,2,3],zmm8[4,5,6,7] +; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm7[0,1,2,3],zmm8[0,1,2,3] ; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm8 = [1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13] ; AVX512F-NEXT: vpshufb %ymm8, %ymm3, %ymm9 ; AVX512F-NEXT: vpshufb %ymm8, %ymm4, %ymm10 @@ -1960,14 +1959,13 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512F-NEXT: vpsrld $8, %zmm2, %zmm9 ; AVX512F-NEXT: vpmovdb %zmm9, %xmm9 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm10[4,5,6,7] -; AVX512F-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm9 ; AVX512F-NEXT: vpshufb %ymm8, %ymm5, %ymm10 ; AVX512F-NEXT: vpshufb %ymm8, %ymm6, %ymm8 ; AVX512F-NEXT: vpermt2d %ymm10, %ymm1, %ymm8 ; AVX512F-NEXT: vpsrld $8, %zmm0, %zmm10 ; AVX512F-NEXT: vpmovdb %zmm10, %xmm10 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm8 = ymm10[0,1,2,3],ymm8[4,5,6,7] -; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm8[0,1,2,3],zmm9[4,5,6,7] +; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm8[0,1,2,3],zmm9[0,1,2,3] ; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm9 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] ; AVX512F-NEXT: vpshufb %ymm9, %ymm3, %ymm10 ; AVX512F-NEXT: vpshufb %ymm9, %ymm4, %ymm11 @@ -1975,14 +1973,13 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512F-NEXT: vpsrld $16, %zmm2, %zmm10 ; AVX512F-NEXT: vpmovdb %zmm10, %xmm10 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2,3],ymm11[4,5,6,7] -; AVX512F-NEXT: vinserti64x4 $1, %ymm10, %zmm0, %zmm10 ; AVX512F-NEXT: vpshufb %ymm9, %ymm5, %ymm11 ; AVX512F-NEXT: vpshufb %ymm9, %ymm6, %ymm9 ; AVX512F-NEXT: vpermt2d %ymm11, %ymm1, %ymm9 ; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm11 ; AVX512F-NEXT: vpmovdb %zmm11, %xmm11 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm9 = ymm11[0,1,2,3],ymm9[4,5,6,7] -; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm9[0,1,2,3],zmm10[4,5,6,7] +; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm9 = zmm9[0,1,2,3],zmm10[0,1,2,3] ; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm10 = [3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15] ; AVX512F-NEXT: vpshufb %ymm10, %ymm3, %ymm3 ; AVX512F-NEXT: vpshufb %ymm10, %ymm4, %ymm4 @@ -1990,14 +1987,13 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512F-NEXT: vpsrld $24, %zmm2, %zmm2 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 ; AVX512F-NEXT: vpshufb %ymm10, %ymm5, %ymm3 ; AVX512F-NEXT: vpshufb %ymm10, %ymm6, %ymm4 ; AVX512F-NEXT: vpermt2d %ymm3, %ymm1, %ymm4 ; AVX512F-NEXT: vpsrld $24, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm4[4,5,6,7] -; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[4,5,6,7] +; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[0,1,2,3] ; AVX512F-NEXT: vmovdqa64 %zmm7, (%rsi) ; AVX512F-NEXT: vmovdqa64 %zmm8, (%rdx) ; AVX512F-NEXT: vmovdqa64 %zmm9, (%rcx) diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll index d253dd1..f3ec442 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll @@ -5100,7 +5100,7 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm6[0,1,2,3,5,4,6,7,8,9,10,11,13,12,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[2,2,2,2,6,6,6,6] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm1[0],ymm0[1],ymm1[2,3],ymm0[4],ymm1[5,6,7,8],ymm0[9],ymm1[10,11],ymm0[12],ymm1[13,14,15] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm11 = ymm1[0],ymm0[1],ymm1[2,3],ymm0[4],ymm1[5,6,7,8],ymm0[9],ymm1[10,11],ymm0[12],ymm1[13,14,15] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm6[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[3,3,3,3,7,7,7,7] @@ -5139,8 +5139,8 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqa64 %xmm20, %xmm0 ; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm9, %xmm0 ; AVX512F-SLOW-NEXT: vmovdqa 32(%rdx), %xmm15 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm15[1,1,2,2] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm11[0],xmm0[1],xmm11[2,3],xmm0[4],xmm11[5,6],xmm0[7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm15[1,1,2,2] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm10[0],xmm0[1],xmm10[2,3],xmm0[4],xmm10[5,6],xmm0[7] ; AVX512F-SLOW-NEXT: vmovdqa64 %ymm0, %ymm26 ; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm15[0],xmm9[0],xmm15[1],xmm9[1],xmm15[2],xmm9[2],xmm15[3],xmm9[3] ; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm9 = xmm15[4],xmm9[4],xmm15[5],xmm9[5],xmm15[6],xmm9[6],xmm15[7],xmm9[7] @@ -5165,13 +5165,13 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqa 32(%rax), %ymm6 ; AVX512F-SLOW-NEXT: vpermd %zmm6, %zmm3, %zmm3 ; AVX512F-SLOW-NEXT: vmovdqa64 %ymm28, %ymm7 -; AVX512F-SLOW-NEXT: vpshufb %ymm7, %ymm6, %ymm11 +; AVX512F-SLOW-NEXT: vpshufb %ymm7, %ymm6, %ymm10 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm6[0,1,1,3,4,5,5,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,2,2,3] ; AVX512F-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm11, %zmm6 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm10, %zmm6 ; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} xmm7 = [0,1,2,3,4,5,4,5,6,7,10,11,8,9,10,11] -; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm2, %xmm11 +; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm2, %xmm10 ; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm1, %xmm1 ; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm13 ; AVX512F-SLOW-NEXT: vmovdqa (%rsi), %xmm14 @@ -5195,10 +5195,10 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm15[2,1,2,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm15 = xmm15[0,1,2,3,4,5,5,4] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm15 = ymm15[0,0,1,3] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,0,1,1] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,0,1,1] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm28 = ymm30[2,2,2,3] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm29 = ymm23[0,2,2,3] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,2,2,3] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,2,2,3] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm19[2,1,3,3] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm30 = ymm18[0,2,2,3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7] @@ -5217,21 +5217,21 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm12[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm5 = ymm5[0],ymm12[1],ymm5[2,3],ymm12[4],ymm5[5,6,7,8],ymm12[9],ymm5[10,11],ymm12[12],ymm5[13,14,15] ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm29, %zmm28, %zmm12 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm10, %zmm7 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm12, %zmm10, %zmm7 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm11, %zmm7 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm11 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm12, %zmm11, %zmm7 ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm30, %zmm0 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm31, %zmm1 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm10, %zmm1 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm11, %zmm1 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm9, %zmm0 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm15, %zmm9 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm10, %zmm9 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm11 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm11, %zmm9 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm11[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm11 = mem[2,1,3,2] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm10[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm10 = mem[2,1,3,2] ; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm12 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: # ymm12 = mem[2,2,2,3] ; AVX512F-SLOW-NEXT: vpshufd $254, {{[-0-9]+}}(%r{{[sb]}}p), %ymm15 # 32-byte Folded Reload @@ -5258,7 +5258,7 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload ; AVX512F-SLOW-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm16, %zmm0 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm9 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm10, %zmm9 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm7, %zmm9 ; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm3 ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm15[2,1,3,2] @@ -5269,7 +5269,7 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm7 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm8, %zmm17, %zmm1 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm18, %zmm8 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm10, %zmm8 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm11, %zmm8 ; AVX512F-SLOW-NEXT: vpbroadcastd 36(%rax), %ymm1 ; AVX512F-SLOW-NEXT: vpbroadcastd 40(%rax), %ymm9 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm1, %zmm1 @@ -5281,9 +5281,8 @@ define void @store_i16_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm28, %zmm9, %zmm9 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm9 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm29, %zmm0, %zmm8 -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm10[0,1,2,3],zmm8[4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm8 = zmm8[0,1,2,3],zmm29[0,1,2,3] ; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm6 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm6 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm31, %zmm30, %zmm8 @@ -10685,14 +10684,14 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; ; AVX512F-SLOW-LABEL: store_i16_stride7_vf64: ; AVX512F-SLOW: # %bb.0: -; AVX512F-SLOW-NEXT: subq $2200, %rsp # imm = 0x898 +; AVX512F-SLOW-NEXT: subq $2168, %rsp # imm = 0x878 ; AVX512F-SLOW-NEXT: vmovdqa 96(%rcx), %ymm2 ; AVX512F-SLOW-NEXT: vmovdqa 96(%rdx), %ymm9 ; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %ymm7 ; AVX512F-SLOW-NEXT: vmovdqa 96(%rsi), %ymm8 ; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm0 = [128,128,128,128,128,128,128,128,14,15,128,128,128,128,128,128,128,128,128,128,128,128,16,17,128,128,128,128,128,128,128,128] ; AVX512F-SLOW-NEXT: vpshufb %ymm0, %ymm2, %ymm1 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm19 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm18 ; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm11 = <u,u,u,u,12,13,14,15,128,128,u,u,u,u,u,u,u,u,u,u,16,17,128,128,u,u,u,u,u,u,u,u> ; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm9, %ymm2 ; AVX512F-SLOW-NEXT: vporq %ymm1, %ymm2, %ymm16 @@ -10705,16 +10704,16 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqa 64(%r9), %ymm2 ; AVX512F-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm1 ; AVX512F-SLOW-NEXT: vmovdqa %ymm3, %ymm10 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm20 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm21 ; AVX512F-SLOW-NEXT: vmovdqa 64(%r8), %ymm3 ; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm15 = <u,u,u,u,u,u,u,u,12,13,14,15,128,128,u,u,u,u,u,u,u,u,u,u,16,17,128,128,u,u,u,u> ; AVX512F-SLOW-NEXT: vpshufb %ymm15, %ymm3, %ymm2 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm3, %ymm24 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm3, %ymm20 ; AVX512F-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1 ; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa 64(%rcx), %ymm2 ; AVX512F-SLOW-NEXT: vpshufb %ymm0, %ymm2, %ymm1 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm18 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm23 ; AVX512F-SLOW-NEXT: vmovdqa 64(%rdx), %ymm6 ; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm6, %ymm2 ; AVX512F-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1 @@ -10744,10 +10743,10 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa (%rsi), %ymm2 ; AVX512F-SLOW-NEXT: vpshufb %ymm12, %ymm2, %ymm1 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm23 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm2, %ymm28 ; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %ymm10 ; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm10, %ymm2 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm10, %ymm25 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm10, %ymm22 ; AVX512F-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1 ; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa 32(%rcx), %ymm2 @@ -10780,7 +10779,7 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [22,23,26,27,0,0,24,25,26,27,0,0,26,27,26,27,22,23,26,27,0,0,24,25,26,27,0,0,26,27,26,27] ; AVX512F-SLOW-NEXT: # ymm12 = mem[0,1,0,1] -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm19, %ymm14 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm18, %ymm14 ; AVX512F-SLOW-NEXT: vpshufb %ymm12, %ymm14, %ymm10 ; AVX512F-SLOW-NEXT: vmovdqa64 %ymm12, %ymm19 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm9[2,2,2,2,6,6,6,6] @@ -10814,54 +10813,53 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm16, %zmm8 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm17, %zmm7 ; AVX512F-SLOW-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm7 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vmovdqa 96(%r8), %ymm12 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm26 = [65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vmovdqa 96(%r8), %ymm10 +; AVX512F-SLOW-NEXT: vmovdqu %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vpshufb {{.*#+}} ymm8 = ymm10[u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm10[14,15,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm10[16,17,u,u,u,u],zero,zero +; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm26, %ymm7, %ymm8 +; AVX512F-SLOW-NEXT: vmovdqa 96(%r9), %ymm12 ; AVX512F-SLOW-NEXT: vmovdqu %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufb {{.*#+}} ymm8 = ymm12[u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm12[14,15,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero,ymm12[16,17,u,u,u,u],zero,zero -; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm17, %ymm7, %ymm8 -; AVX512F-SLOW-NEXT: vmovdqa 96(%r9), %ymm14 -; AVX512F-SLOW-NEXT: vmovdqu %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufb %ymm3, %ymm14, %ymm9 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm10, %ymm8, %ymm9 +; AVX512F-SLOW-NEXT: vpshufb %ymm3, %ymm12, %ymm9 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm14 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $248, %ymm14, %ymm8, %ymm9 ; AVX512F-SLOW-NEXT: vextracti64x4 $1, %zmm7, %ymm7 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm8 = ymm12[0,0,2,1,4,4,6,5] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm8 = ymm10[0,0,2,1,4,4,6,5] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,3,3] ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0] ; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm7, %ymm10, %ymm8 -; AVX512F-SLOW-NEXT: vprold $16, %ymm14, %ymm7 +; AVX512F-SLOW-NEXT: vprold $16, %ymm12, %ymm7 ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,2,2,2] -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm8, %ymm10, %ymm7 -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm10, %zmm16 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm14 = [65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm8, %ymm14, %ymm7 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm7 ; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm7 = zmm9[0,1,2,3],zmm7[4,5,6,7] ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512F-SLOW-NEXT: vbroadcasti64x4 {{.*#+}} zmm8 = [6,5,0,0,7,6,0,7,6,5,0,0,7,6,0,7] -; AVX512F-SLOW-NEXT: # zmm8 = mem[0,1,2,3,0,1,2,3] +; AVX512F-SLOW-NEXT: vbroadcasti64x4 {{.*#+}} zmm18 = [6,5,0,0,7,6,0,7,6,5,0,0,7,6,0,7] +; AVX512F-SLOW-NEXT: # zmm18 = mem[0,1,2,3,0,1,2,3] ; AVX512F-SLOW-NEXT: vmovdqa 96(%rax), %ymm7 -; AVX512F-SLOW-NEXT: vpermd %zmm7, %zmm8, %zmm9 +; AVX512F-SLOW-NEXT: vpermd %zmm7, %zmm18, %zmm9 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm9 = ymm7[0,1,1,3,4,5,5,7] ; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm10 = [12,13,128,128,128,128,128,128,128,128,128,128,128,128,14,15,128,128,128,128,128,128,128,128,128,128,128,128,16,17,128,128] ; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm7, %ymm7 ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,2,2,3] -; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm17, %ymm9 +; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm26, %ymm9 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm7, %zmm7 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm14 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535] +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm16 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535] ; AVX512F-SLOW-NEXT: vpbroadcastd 72(%rax), %ymm7 -; AVX512F-SLOW-NEXT: vpandn %ymm7, %ymm14, %ymm9 +; AVX512F-SLOW-NEXT: vpandnq %ymm7, %ymm16, %ymm9 ; AVX512F-SLOW-NEXT: vmovdqa 64(%rax), %ymm7 ; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm7, %ymm12 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm9, %zmm9 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpbroadcastd 8(%rax), %ymm9 -; AVX512F-SLOW-NEXT: vpandn %ymm9, %ymm14, %ymm9 -; AVX512F-SLOW-NEXT: vmovdqa (%rax), %ymm14 -; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm14, %ymm12 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm14, %ymm22 +; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm16, %ymm9 +; AVX512F-SLOW-NEXT: vmovdqa (%rax), %ymm8 +; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm8, %ymm12 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm8, %ymm24 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm9, %zmm9 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm9 = ymm2[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] @@ -10878,35 +10876,35 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshufb %ymm10, %ymm9, %ymm10 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm9[0,1,1,3,4,5,5,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm12 = ymm12[0,2,2,3] -; AVX512F-SLOW-NEXT: vpandnq %ymm12, %ymm17, %ymm12 +; AVX512F-SLOW-NEXT: vpandnq %ymm12, %ymm26, %ymm12 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm10, %zmm10 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm10 = ymm13[0,0,2,1,4,4,6,5] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm12 = ymm15[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm12[0,0,0,0,4,4,4,4] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm12[0,1,2],ymm10[3],ymm12[4,5],ymm10[6],ymm12[7,8,9,10],ymm10[11],ymm12[12,13],ymm10[14],ymm12[15] -; AVX512F-SLOW-NEXT: vmovdqu %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm8 = ymm12[0,1,2],ymm10[3],ymm12[4,5],ymm10[6],ymm12[7,8,9,10],ymm10[11],ymm12[12,13],ymm10[14],ymm12[15] +; AVX512F-SLOW-NEXT: vmovdqu %ymm8, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa64 %ymm19, %ymm13 ; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm2, %ymm10 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm12 = ymm1[2,2,2,2,6,6,6,6] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm10[0,1],ymm12[2],ymm10[3,4],ymm12[5],ymm10[6,7,8,9],ymm12[10],ymm10[11,12],ymm12[13],ymm10[14,15] -; AVX512F-SLOW-NEXT: vmovdqu %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm8 = ymm10[0,1],ymm12[2],ymm10[3,4],ymm12[5],ymm10[6,7,8,9],ymm12[10],ymm10[11,12],ymm12[13],ymm10[14,15] +; AVX512F-SLOW-NEXT: vmovdqu %ymm8, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,3,3,3,7,7,7,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6,7,8],ymm2[9],ymm1[10,11],ymm2[12],ymm1[13,14,15] -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm31 +; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm0[0,1,2,3,5,4,6,7,8,9,10,11,13,12,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm11[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3],ymm1[4],ymm2[5,6,7,8],ymm1[9],ymm2[10,11],ymm1[12],ymm2[13,14,15] -; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm31 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm11[3,3,3,3,7,7,7,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6],ymm1[7,8,9,10],ymm0[11],ymm1[12,13],ymm0[14],ymm1[15] ; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpermd %zmm9, %zmm8, %zmm0 +; AVX512F-SLOW-NEXT: vpermd %zmm9, %zmm18, %zmm0 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm5[1,2,3,3,4,5,6,7,9,10,11,11,12,13,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,1,4,4,6,5] @@ -10918,31 +10916,31 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm4[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3],ymm0[4],ymm1[5,6,7,8],ymm0[9],ymm1[10,11],ymm0[12],ymm1[13,14,15] ; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm18, %ymm2 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm23, %ymm2 ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm2[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm6[0,1,1,3,4,5,5,7] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15] -; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm0, %ymm30 ; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm2, %ymm0 -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm18, %ymm3 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm23, %ymm3 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm6[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7,8,9],ymm1[10],ymm0[11,12],ymm1[13],ymm0[14,15] ; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm20, %ymm8 -; AVX512F-SLOW-NEXT: vprold $16, %ymm20, %ymm0 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm24[1,2,2,3,5,6,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm21, %ymm8 +; AVX512F-SLOW-NEXT: vprold $16, %ymm21, %ymm0 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm20[1,2,2,3,5,6,6,7] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm8[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm24[0,0,2,1,4,4,6,5] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm20[0,0,2,1,4,4,6,5] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5],ymm2[6],ymm1[7,8,9,10],ymm2[11],ymm1[12,13],ymm2[14],ymm1[15] ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [2,2,3,3,10,9,11,10] ; AVX512F-SLOW-NEXT: vpermt2q %zmm0, %zmm10, %zmm1 ; AVX512F-SLOW-NEXT: vbroadcasti64x4 {{.*#+}} zmm11 = [0,5,4,0,0,6,5,0,0,5,4,0,0,6,5,0] ; AVX512F-SLOW-NEXT: # zmm11 = mem[0,1,2,3,0,1,2,3] ; AVX512F-SLOW-NEXT: vpermd 64(%rax), %zmm11, %zmm0 -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm17, %zmm0 +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm26, %zmm0 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm6[3,3,3,3,7,7,7,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm3[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] @@ -10954,61 +10952,61 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5],ymm1[6],ymm0[7,8,9,10],ymm1[11],ymm0[12,13],ymm1[14],ymm0[15] ; AVX512F-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm24[3,3,3,3,7,7,7,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm20[3,3,3,3,7,7,7,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm8[0,1,2,3,5,6,7,7,8,9,10,11,13,14,15,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,3,6,6,6,7] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7,8,9],ymm0[10],ymm1[11,12],ymm0[13],ymm1[14,15] ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm12 = [4,5,4,5,4,5,6,7,16,17,16,17,16,17,17,19] ; AVX512F-SLOW-NEXT: vmovdqa 96(%r9), %xmm0 ; AVX512F-SLOW-NEXT: vmovdqa 96(%r8), %xmm1 -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[0,1,3,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpermt2d %zmm4, %zmm12, %zmm2 +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,1,3,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vpermt2d %zmm4, %zmm12, %zmm3 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm7[2,3,3,3,6,7,7,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm4 = ymm4[2,1,3,2] ; AVX512F-SLOW-NEXT: vpbroadcastd 96(%rax), %ymm5 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4 ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm2, %zmm5, %zmm4 +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm3, %zmm5, %zmm4 ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm5, %zmm15 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa 96(%rsi), %xmm2 +; AVX512F-SLOW-NEXT: vmovdqa 96(%rsi), %xmm3 ; AVX512F-SLOW-NEXT: vmovdqa 96(%rdi), %xmm4 -; AVX512F-SLOW-NEXT: vprold $16, %xmm2, %xmm5 +; AVX512F-SLOW-NEXT: vprold $16, %xmm3, %xmm5 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,3] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2],xmm6[3,4],xmm5[5],xmm6[6,7] ; AVX512F-SLOW-NEXT: vmovdqu %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm18 -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; AVX512F-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa 96(%rcx), %xmm2 +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm23 +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; AVX512F-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512F-SLOW-NEXT: vmovdqa 96(%rcx), %xmm3 ; AVX512F-SLOW-NEXT: vmovdqa 96(%rdx), %xmm4 ; AVX512F-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm6 = [6,7,4,5,0,0,8,9,6,7,4,5,0,0,8,9] -; AVX512F-SLOW-NEXT: vpshufb %xmm6, %xmm2, %xmm5 +; AVX512F-SLOW-NEXT: vpshufb %xmm6, %xmm3, %xmm5 ; AVX512F-SLOW-NEXT: vmovdqa %xmm6, %xmm7 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,2] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6],xmm5[7] ; AVX512F-SLOW-NEXT: vmovdqu %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] ; AVX512F-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; AVX512F-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; AVX512F-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,5,7,6] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,6] ; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,4,5,6,7,10,11,8,9,10,11] ; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm1, %xmm1 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm28 = <16,18,19,19,19,19,u,u,0,1,0,1,2,3,2,3> -; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm28, %zmm1 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm29 = <16,18,19,19,19,19,u,u,0,1,0,1,2,3,2,3> +; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm29, %zmm1 ; AVX512F-SLOW-NEXT: vpbroadcastd 100(%rax), %ymm2 ; AVX512F-SLOW-NEXT: vpbroadcastd 104(%rax), %ymm3 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm16, %zmm2 +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm14, %zmm2 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa 64(%rcx), %xmm1 ; AVX512F-SLOW-NEXT: vmovdqa 64(%rdx), %xmm2 ; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm3, %xmm30 +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm3, %xmm27 ; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX512F-SLOW-NEXT: vpshufb %xmm7, %xmm1, %xmm1 ; AVX512F-SLOW-NEXT: vmovdqa %xmm7, %xmm8 @@ -11023,37 +11021,37 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm5, %xmm5 ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm5 = ymm5[0,0,1,1] ; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm6, %xmm26 +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm6, %xmm25 ; AVX512F-SLOW-NEXT: vprold $16, %xmm4, %xmm4 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4],xmm4[5],xmm2[6,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm4 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0] -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm4 -; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0] +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2 +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa 64(%r9), %xmm1 -; AVX512F-SLOW-NEXT: vmovdqa 64(%r8), %xmm4 -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm16 -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,5,7,6] +; AVX512F-SLOW-NEXT: vmovdqa 64(%r8), %xmm2 +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm5, %xmm17 +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5,7,6] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7] ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm6 = <0,1,0,1,0,1,1,3,16,18,19,19,19,19,u,u> -; AVX512F-SLOW-NEXT: vpermt2d %zmm4, %zmm6, %zmm1 -; AVX512F-SLOW-NEXT: vpbroadcastd 64(%rax), %ymm4 +; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm6, %zmm1 +; AVX512F-SLOW-NEXT: vpbroadcastd 64(%rax), %ymm2 ; AVX512F-SLOW-NEXT: vpbroadcastd 68(%rax), %ymm5 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm2, %zmm2 ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm9 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm9, %zmm4 -; AVX512F-SLOW-NEXT: vmovdqu64 %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm9, %zmm2 +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa (%rcx), %xmm1 ; AVX512F-SLOW-NEXT: vmovdqa (%rdx), %xmm5 -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm4, %xmm19 +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm2, %xmm20 ; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3] ; AVX512F-SLOW-NEXT: vpshufb %xmm8, %xmm1, %xmm1 -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm8, %xmm29 +; AVX512F-SLOW-NEXT: vmovdqa %xmm8, %xmm14 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[1,1,2,2] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0],xmm1[1],xmm5[2,3],xmm1[4],xmm5[5,6],xmm1[7] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm7[0,1,3,2,4,5,6,7] @@ -11063,150 +11061,153 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] ; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm7, %xmm7 ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,0,1,1] -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm14 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm2, %xmm19 ; AVX512F-SLOW-NEXT: vprold $16, %xmm5, %xmm5 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm5[2],xmm3[3,4],xmm5[5],xmm3[6,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,0,2,1] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm7, %zmm3 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm3 -; AVX512F-SLOW-NEXT: vmovdqu64 %zmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm7, %zmm2 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2 +; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vmovdqa (%r9), %xmm1 -; AVX512F-SLOW-NEXT: vmovdqa (%r8), %xmm2 -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5,7,6] +; AVX512F-SLOW-NEXT: vmovdqa (%r8), %xmm3 +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,5,7,6] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpermt2d %zmm2, %zmm6, %zmm1 -; AVX512F-SLOW-NEXT: vpbroadcastd (%rax), %ymm2 -; AVX512F-SLOW-NEXT: vpbroadcastd 4(%rax), %ymm3 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-SLOW-NEXT: vpermt2d %zmm3, %zmm6, %zmm1 +; AVX512F-SLOW-NEXT: vpbroadcastd (%rax), %ymm3 +; AVX512F-SLOW-NEXT: vpbroadcastd 4(%rax), %ymm4 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm2 ; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm9, %zmm2 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm23, %ymm7 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm28, %ymm7 ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm7[1,2,3,3,4,5,6,7,9,10,11,11,12,13,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,1,4,4,6,5] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm25[1,1,1,1,5,5,5,5] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7,8,9],ymm2[10],ymm1[11,12],ymm2[13],ymm1[14,15] -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm20 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm22, %ymm2 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm22[1,1,1,1,5,5,5,5] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm4[2],ymm1[3,4],ymm4[5],ymm1[6,7,8,9],ymm4[10],ymm1[11,12],ymm4[13],ymm1[14,15] +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm22 ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm7[0,1,2,3,5,4,6,7,8,9,10,11,13,12,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,2,2,2,6,6,6,6] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm25[2,2,2,2,6,6,6,6] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3],ymm1[4],ymm2[5,6,7,8],ymm1[9],ymm2[10,11],ymm1[12],ymm2[13,14,15] -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm27 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm2[2,2,2,2,6,6,6,6] +; AVX512F-SLOW-NEXT: vmovdqa %ymm2, %ymm9 +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm4[0],ymm1[1],ymm4[2,3],ymm1[4],ymm4[5,6,7,8],ymm1[9],ymm4[10,11],ymm1[12],ymm4[13,14,15] +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm21 +; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX512F-SLOW-NEXT: vprold $16, %ymm2, %ymm1 ; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm3 # 32-byte Reload -; AVX512F-SLOW-NEXT: vprold $16, %ymm3, %ymm1 -; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm5[1,2,2,3,5,6,6,7] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4],ymm1[5],ymm2[6,7,8,9],ymm1[10],ymm2[11,12],ymm1[13],ymm2[14,15] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm3[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,0,0,0,4,4,4,4] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm5[0,0,2,1,4,4,6,5] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2],ymm6[3],ymm2[4,5],ymm6[6],ymm2[7,8,9,10],ymm6[11],ymm2[12,13],ymm6[14],ymm2[15] -; AVX512F-SLOW-NEXT: vpermt2q %zmm1, %zmm10, %zmm2 +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm3[1,2,2,3,5,6,6,7] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm4[0,1],ymm1[2],ymm4[3,4],ymm1[5],ymm4[6,7,8,9],ymm1[10],ymm4[11,12],ymm1[13],ymm4[14,15] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm4 = ymm2[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[0,0,0,0,4,4,4,4] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm3[0,0,2,1,4,4,6,5] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm4 = ymm4[0,1,2],ymm6[3],ymm4[4,5],ymm6[6],ymm4[7,8,9,10],ymm6[11],ymm4[12,13],ymm6[14],ymm4[15] +; AVX512F-SLOW-NEXT: vpermt2q %zmm1, %zmm10, %zmm4 ; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm8 # 32-byte Reload ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm8[1,2,2,3,4,5,6,7,9,10,10,11,12,13,14,15] ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] ; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Reload ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm10[0,1,1,3,4,5,5,7] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm6[0,1],ymm1[2],ymm6[3,4],ymm1[5],ymm6[6,7,8,9],ymm1[10],ymm6[11,12],ymm1[13],ymm6[14,15] -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm24 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm16 ; AVX512F-SLOW-NEXT: vpshufb %ymm13, %ymm8, %ymm1 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm6 = ymm10[2,2,2,2,6,6,6,6] ; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm6[2],ymm1[3,4],ymm6[5],ymm1[6,7,8,9],ymm6[10],ymm1[11,12],ymm6[13],ymm1[14,15] -; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm21 +; AVX512F-SLOW-NEXT: vmovdqa64 %ymm1, %ymm18 ; AVX512F-SLOW-NEXT: vpermd (%rax), %zmm11, %zmm1 -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm2, %zmm17, %zmm1 +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm4, %zmm26, %zmm1 ; AVX512F-SLOW-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm10[3,3,3,3,7,7,7,7] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm8[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,2,6,6,6,6] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6,7,8],ymm2[9],ymm1[10,11],ymm2[12],ymm1[13,14,15] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm4 = ymm8[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[2,2,2,2,6,6,6,6] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2,3],ymm4[4],ymm1[5,6,7,8],ymm4[9],ymm1[10,11],ymm4[12],ymm1[13,14,15] ; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm25[3,3,3,3,7,7,7,7] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm7[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,2,6,6,6,6] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5],ymm2[6],ymm1[7,8,9,10],ymm2[11],ymm1[12,13],ymm2[14],ymm1[15] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm9[3,3,3,3,7,7,7,7] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm4 = ymm7[0,1,2,3,7,6,6,7,8,9,10,11,15,14,14,15] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[2,2,2,2,6,6,6,6] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm4[3],ymm1[4,5],ymm4[6],ymm1[7,8,9,10],ymm4[11],ymm1[12,13],ymm4[14],ymm1[15] ; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm5[3,3,3,3,7,7,7,7] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm3[0,1,2,3,5,6,7,7,8,9,10,11,13,14,15,15] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,2,2,3,6,6,6,7] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4],ymm1[5],ymm2[6,7,8,9],ymm1[10],ymm2[11,12],ymm1[13],ymm2[14,15] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[3,3,3,3,7,7,7,7] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm4 = ymm2[0,1,2,3,5,6,7,7,8,9,10,11,13,14,15,15] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[2,2,2,3,6,6,6,7] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm4[0,1],ymm1[2],ymm4[3,4],ymm1[5],ymm4[6,7,8,9],ymm1[10],ymm4[11,12],ymm1[13],ymm4[14,15] ; AVX512F-SLOW-NEXT: vmovdqa 32(%r9), %xmm7 -; AVX512F-SLOW-NEXT: vmovdqa 32(%r8), %xmm2 -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3] +; AVX512F-SLOW-NEXT: vmovdqa 32(%r8), %xmm4 +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3] ; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm11 = xmm6[0,1,3,2,4,5,6,7] ; AVX512F-SLOW-NEXT: vpermt2d %zmm11, %zmm12, %zmm1 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm11 = ymm22[2,3,3,3,6,7,7,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm11 = ymm24[2,3,3,3,6,7,7,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,1,3,2] ; AVX512F-SLOW-NEXT: vpbroadcastd 32(%rax), %ymm12 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm25 -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm15, %zmm25 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm26 +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm1, %zmm15, %zmm26 ; AVX512F-SLOW-NEXT: vmovdqa 32(%rdi), %xmm1 ; AVX512F-SLOW-NEXT: vmovdqa 32(%rsi), %xmm12 ; AVX512F-SLOW-NEXT: vprold $16, %xmm12, %xmm15 ; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm1[1,1,2,3] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm13[0,1],xmm15[2],xmm13[3,4],xmm15[5],xmm13[6,7] -; AVX512F-SLOW-NEXT: vmovdqu %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm13[0,1],xmm15[2],xmm13[3,4],xmm15[5],xmm13[6,7] +; AVX512F-SLOW-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm13 = xmm1[0],xmm12[0],xmm1[1],xmm12[1],xmm1[2],xmm12[2],xmm1[3],xmm12[3] ; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm12[4],xmm1[4],xmm12[5],xmm1[5],xmm12[6],xmm1[6],xmm12[7],xmm1[7] ; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm16, %xmm1 +; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm4[4],xmm7[4],xmm4[5],xmm7[5],xmm4[6],xmm7[6],xmm4[7],xmm7[7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm17, %xmm1 +; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm1, %xmm2 +; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm5, %xmm3 +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm23, %xmm1 ; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm1, %xmm1 -; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm4, %xmm3 -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm18, %xmm4 +; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm13, %xmm1 +; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm4, %xmm4 -; AVX512F-SLOW-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm13, %xmm4 -; AVX512F-SLOW-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm2, %xmm2 -; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm23 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm23 = mem[2,2,2,3] -; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm22 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm22 = mem[0,2,2,3] -; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm18 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm18 = mem[0,2,2,3] +; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm24 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm24 = mem[2,2,2,3] +; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm23 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm23 = mem[0,2,2,3] +; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm7 = mem[0,2,2,3] ; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm17 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: # ymm17 = mem[2,1,3,3] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm6[0,1,2,3,4,5,7,6] -; AVX512F-SLOW-NEXT: vpermt2d %zmm0, %zmm28, %zmm2 +; AVX512F-SLOW-NEXT: vpermt2d %zmm0, %zmm29, %zmm4 ; AVX512F-SLOW-NEXT: vpbroadcastd 36(%rax), %ymm0 ; AVX512F-SLOW-NEXT: vpbroadcastd 40(%rax), %ymm6 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm28 -; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm28 +; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm28 ; AVX512F-SLOW-NEXT: vmovdqa 32(%rcx), %xmm6 -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm29, %xmm0 -; AVX512F-SLOW-NEXT: vpshufb %xmm0, %xmm6, %xmm2 +; AVX512F-SLOW-NEXT: vpshufb %xmm14, %xmm6, %xmm4 ; AVX512F-SLOW-NEXT: vmovdqa 32(%rdx), %xmm0 -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[1,1,2,2] -; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0],xmm2[1],xmm7[2,3],xmm2[4],xmm7[5,6],xmm2[7] -; AVX512F-SLOW-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] -; AVX512F-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm0[1,1,2,2] +; AVX512F-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm12[0],xmm4[1],xmm12[2,3],xmm4[4],xmm12[5,6],xmm4[7] +; AVX512F-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] +; AVX512F-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512F-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] ; AVX512F-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Reload -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm4[0,1,2,3,5,5,7,6,8,9,10,11,13,13,15,14] +; AVX512F-SLOW-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm1[0,1,2,3,5,5,7,6,8,9,10,11,13,13,15,14] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm29 = ymm0[3,3,3,3] -; AVX512F-SLOW-NEXT: vpshufd $233, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm2 = mem[1,2,2,3,5,6,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm10 = ymm4[2,1,2,3,6,5,6,7] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm10 = ymm10[0,0,3,3,4,5,6,7,8,8,11,11,12,13,14,15] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm10[2,2,2,2] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,1] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm30, %xmm4 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm4[0,2,3,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd $233, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm0 = mem[1,2,2,3,5,6,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm14 = ymm1[2,1,2,3,6,5,6,7] +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} ymm14 = ymm14[0,0,3,3,4,5,6,7,8,8,11,11,12,13,14,15] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm14 = ymm14[2,2,2,2] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,1,1] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm27, %xmm1 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm10 = xmm1[0,2,3,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,0,2,1] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm26, %xmm4 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm4[2,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm25, %xmm1 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm8 = xmm1[2,1,2,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm8 = xmm8[0,1,2,3,4,5,5,4] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm8 = ymm8[0,0,1,3] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,0,1,1] -; AVX512F-SLOW-NEXT: vmovdqa64 %xmm19, %xmm4 -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm4[0,2,3,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm20, %xmm1 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm1[0,2,3,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm15 = ymm15[0,0,2,1] -; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm12 = xmm14[2,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqa64 %xmm19, %xmm1 +; AVX512F-SLOW-NEXT: vpshuflw {{.*#+}} xmm12 = xmm1[2,1,2,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm12 = xmm12[0,1,2,3,4,5,5,4] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm12 = ymm12[0,0,1,3] ; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload @@ -11215,176 +11216,174 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; AVX512F-SLOW-NEXT: # ymm5 = mem[2,2,2,3] ; AVX512F-SLOW-NEXT: vpermq $250, {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: # ymm6 = mem[2,2,3,3] -; AVX512F-SLOW-NEXT: vpermpd $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm7 = mem[2,2,2,3] -; AVX512F-SLOW-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vpermpd $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm1 = mem[2,2,2,3] +; AVX512F-SLOW-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-SLOW-NEXT: vpermpd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm1 = mem[0,2,2,3] +; AVX512F-SLOW-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm31 = ymm31[0,2,2,3] -; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm30 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm30 = mem[0,2,2,3] -; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm26 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm26 = mem[2,1,3,3] -; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm19 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm19 = mem[2,2,2,3] +; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm27 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm27 = mem[2,1,3,3] +; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm20 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm20 = mem[2,2,2,3] ; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: # ymm11 = mem[0,2,2,3] -; AVX512F-SLOW-NEXT: vpermq $182, {{[-0-9]+}}(%r{{[sb]}}p), %ymm16 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm16 = mem[2,1,3,2] -; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm7 = mem[2,2,2,3] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm20 = ymm20[2,2,2,3] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm13 = ymm27[0,2,2,3] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm14 = ymm24[2,1,3,2] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm21[2,2,2,3] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm22, %zmm23, %zmm22 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm17, %zmm18, %zmm23 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm27 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm22, %zmm27, %zmm23 -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,1,3,2] -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm23, %ymm17, %ymm2 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535] -; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm2, %ymm17, %ymm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm17, %zmm2 # 64-byte Folded Reload -; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm1 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm1[4,5,6,7] -; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm17, %zmm3 # 64-byte Folded Reload +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm19 = ymm30[2,1,3,2] +; AVX512F-SLOW-NEXT: vpermq $234, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm1 = mem[2,2,2,3] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm25 = ymm22[2,2,2,3] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm13 = ymm21[0,2,2,3] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm16 = ymm16[2,1,3,2] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm18[2,2,2,3] +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm23, %zmm24, %zmm23 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm17, %zmm7, %zmm24 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm30 = [65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm23, %zmm30, %zmm24 +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,2] +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm24, %ymm7, %ymm0 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm18 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535] +; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm0, %ymm18, %ymm14 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm18, %zmm2 # 64-byte Folded Reload +; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm18, %zmm3 # 64-byte Folded Reload ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm10, %zmm10 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm8, %zmm8 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm22 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm10, %zmm22, %zmm8 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm23 = [65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm10, %zmm23, %zmm8 ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm17 = [0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535] ; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm17, %zmm8 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm15, %zmm2 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm12, %zmm10 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm22, %zmm10 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm23, %zmm10 ; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm3, %zmm17, %zmm10 -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm2 -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm18 -; AVX512F-SLOW-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm18 -; AVX512F-SLOW-NEXT: vextracti64x4 $1, %zmm23, %ymm2 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogd $226, 124(%r8){1to8}, %ymm1, %ymm2 +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm2 +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm21 +; AVX512F-SLOW-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm21 +; AVX512F-SLOW-NEXT: vextracti64x4 $1, %zmm24, %ymm2 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogd $226, 124(%r8){1to8}, %ymm0, %ymm2 ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535] ; AVX512F-SLOW-NEXT: vpternlogq $184, %ymm2, %ymm3, %ymm29 -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $248, {{[-0-9]+}}(%r{{[sb]}}p), %zmm3, %zmm21 # 64-byte Folded Reload -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm2 -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm1[0,1,2,3],zmm2[4,5,6,7] +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 # 64-byte Reload +; AVX512F-SLOW-NEXT: vpternlogq $248, {{[-0-9]+}}(%r{{[sb]}}p), %zmm3, %zmm22 # 64-byte Folded Reload +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm24 = zmm0[0,1,2,3],zmm6[0,1,2,3] ; AVX512F-SLOW-NEXT: vpternlogq $234, {{[-0-9]+}}(%r{{[sb]}}p), %zmm3, %zmm24 # 64-byte Folded Reload ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm29, %zmm0, %zmm2 -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 # 64-byte Reload -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0] -; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm0, %zmm1, %zmm23 +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm14[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogd $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm29 # 64-byte Folded Reload -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0] +; AVX512F-SLOW-NEXT: vpternlogd $184, %zmm2, %zmm0, %zmm29 +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload +; AVX512F-SLOW-NEXT: vpternlogd $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm7 # 64-byte Folded Reload +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535] +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload +; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2, %zmm18 # 64-byte Folded Reload ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm17 # 64-byte Folded Reload -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm6 # 64-byte Folded Reload -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm11, %zmm19, %zmm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm16, %zmm2 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535] -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm3, %zmm2 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm20, %zmm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm14, %zmm4 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm3, %zmm4 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0] -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm2, %zmm0, %zmm19 +; AVX512F-SLOW-NEXT: vpternlogq $184, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2, %zmm17 # 64-byte Folded Reload +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm11, %zmm20, %zmm2 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm19, %zmm3 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535,65535,65535,0,0,65535,65535,65535] +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm4, %zmm3 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm25, %zmm2 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm16, %zmm5 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm4, %zmm5 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0] ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm4, %zmm0, %zmm20 +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm3, %zmm2, %zmm20 +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 # 64-byte Reload +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm5, %zmm2, %zmm25 ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm31, %zmm0, %zmm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm26, %zmm30, %zmm2 -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm0, %zmm27, %zmm2 -; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm0 = mem[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm3 = mem[0,1,3,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm3 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm3 = mem[2,1,3,3] -; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm4 = mem[0,0,1,1] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm4 = mem[0,0,2,1] -; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm5 = mem[2,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,5,4] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm5 = ymm5[0,0,1,3] -; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm26 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm26 = mem[0,0,1,1] -; AVX512F-SLOW-NEXT: vpshuflw $248, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm7 = mem[0,2,3,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,0,2,1] -; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm9 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm9 = mem[0,2,2,3] -; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm11 = mem[0,1,3,2,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[0,0,1,1] -; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm12 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm12 = mem[2,1,3,3] -; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm13 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm13 = mem[0,0,1,1] -; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload -; AVX512F-SLOW-NEXT: # ymm14 = mem[0,0,2,1] -; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Folded Reload -; AVX512F-SLOW-NEXT: # xmm15 = mem[2,1,2,3,4,5,6,7] -; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm15 = xmm15[0,1,2,3,4,5,5,4] -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm15 = ymm15[0,0,1,3] +; AVX512F-SLOW-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm27, %zmm31, %zmm3 +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm2, %zmm30, %zmm3 +; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm2 = mem[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Folded Reload +; AVX512F-SLOW-NEXT: # xmm4 = mem[0,1,3,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,0,1,1] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,1,1,3] +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm4 = mem[2,1,3,3] +; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm5 = mem[0,0,1,1] +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4 +; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm5 = mem[0,0,2,1] +; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload +; AVX512F-SLOW-NEXT: # xmm6 = mem[2,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,5,5,4] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,0,1,3] +; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm19 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm19 = mem[0,0,1,1] +; AVX512F-SLOW-NEXT: vpshuflw $248, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload +; AVX512F-SLOW-NEXT: # xmm9 = mem[0,2,3,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,0,2,1] +; AVX512F-SLOW-NEXT: vpermq $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm11 = mem[0,2,2,3] +; AVX512F-SLOW-NEXT: vpshuflw $180, {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload +; AVX512F-SLOW-NEXT: # xmm12 = mem[0,1,3,2,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[0,0,1,1] +; AVX512F-SLOW-NEXT: vpermq $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm13 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm13 = mem[2,1,3,3] +; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm14 = mem[0,0,1,1] +; AVX512F-SLOW-NEXT: vpermq $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm15 # 32-byte Folded Reload +; AVX512F-SLOW-NEXT: # ymm15 = mem[0,0,2,1] +; AVX512F-SLOW-NEXT: vpshuflw $230, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVX512F-SLOW-NEXT: # xmm0 = mem[2,1,2,3,4,5,6,7] +; AVX512F-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4] +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] ; AVX512F-SLOW-NEXT: vpermq $80, {{[-0-9]+}}(%r{{[sb]}}p), %ymm16 # 32-byte Folded Reload ; AVX512F-SLOW-NEXT: # ymm16 = mem[0,0,1,1] ; AVX512F-SLOW-NEXT: vpshuflw $248, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; AVX512F-SLOW-NEXT: # xmm1 = mem[0,2,3,3,4,5,6,7] ; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm27, %zmm3 -; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm11[0,1,1,3] -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm9, %zmm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm13, %zmm12, %zmm9 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm27, %zmm9 -; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535] -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm3, %zmm0, %zmm11 -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm9, %zmm0, %zmm25 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm0 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm26, %zmm3 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm22, %zmm3 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm15, %zmm14, %zmm0 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm30, %zmm4 +; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm12[0,1,1,3] +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm11, %zmm2 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm14, %zmm13, %zmm11 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm30, %zmm11 +; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535] +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm4, %zmm2, %zmm12 +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm11, %zmm2, %zmm26 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm6, %zmm5, %zmm2 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm9, %zmm19, %zmm4 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm23, %zmm4 +; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm15, %zmm0 ; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm16, %zmm1 -; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm22, %zmm1 +; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm23, %zmm1 ; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0,0,65535,65535,65535,65535,0,0] -; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm4 # 64-byte Reload -; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm3, %zmm0, %zmm4 +; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload +; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm4, %zmm0, %zmm2 ; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm1, %zmm0, %zmm28 -; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm18, %zmm24 -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm29, %zmm0 -; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm21, %zmm24 +; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm7 ; AVX512F-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm24, 320(%rax) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm28, 256(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm25, 192(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm20, 128(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm26, 192(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm25, 128(%rax) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm10, 64(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm6, (%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm17, 448(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm4, 704(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm11, 640(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm19, 576(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm17, (%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm18, 448(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, 704(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm12, 640(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm20, 576(%rax) ; AVX512F-SLOW-NEXT: vmovdqa64 %zmm8, 512(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm0, 384(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm21, 768(%rax) -; AVX512F-SLOW-NEXT: vmovdqa64 %zmm23, 832(%rax) -; AVX512F-SLOW-NEXT: addq $2200, %rsp # imm = 0x898 +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm7, 384(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm22, 768(%rax) +; AVX512F-SLOW-NEXT: vmovdqa64 %zmm29, 832(%rax) +; AVX512F-SLOW-NEXT: addq $2168, %rsp # imm = 0x878 ; AVX512F-SLOW-NEXT: vzeroupper ; AVX512F-SLOW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll index d6cd027..b7baf7b 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll @@ -725,19 +725,17 @@ define void @store_i8_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero ; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512BW-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2] -; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,1,3] -; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zmm1[18],zero,zero,zero,zero,zero,zero,zmm1[19],zero,zero,zero,zero,zmm1[36,44],zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46,54],zero,zero,zero,zero,zero,zero,zmm1[55],zero,zero,zero,zero,zero,zero,zero,zero -; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,0,2] ; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm0[0,2,1,3,4,6,5,7] +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zmm1[18],zero,zero,zero,zero,zero,zero,zmm1[19],zero,zero,zero,zero,zmm1[36,44],zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46,54],zero,zero,zero,zero,zero,zero,zmm1[55],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,0,2,5,7,4,6] ; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zmm0[1],zero,zero,zero,zero,zmm0[18,26],zero,zero,zero,zero,zero,zmm0[19,27],zero,zero,zero,zero,zero,zero,zero,zmm0[36],zero,zero,zero,zero,zero,zero,zmm0[37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero ; AVX512BW-SLOW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm1 -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,0,1] -; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zero,zero,zmm2[0,8],zero,zero,zero,zero,zero,zmm2[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm2[19,27],zero,zero,zero,zero,zero,zmm2[20,28],zero,zero,zero,zero,zero,zero,zero,zmm2[37,45],zero,zero,zero,zero,zero,zmm2[38,46],zero,zero,zero,zmm2[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512BW-SLOW-NEXT: vporq %zmm1, %zmm2, %zmm1 +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[2,10,18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zero,zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zero,zero,zmm1[37,45],zero,zero,zero,zero,zero,zmm1[38,46],zero,zero,zero,zmm1[55,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-SLOW-NEXT: vporq %zmm2, %zmm1, %zmm1 ; AVX512BW-SLOW-NEXT: movabsq $63546854584629360, %rcx # imm = 0xE1C3870E1C3870 ; AVX512BW-SLOW-NEXT: kmovq %rcx, %k1 ; AVX512BW-SLOW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} @@ -7627,17 +7625,15 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm4 ; AVX512F-SLOW-NEXT: vporq %ymm15, %ymm18, %ymm5 ; AVX512F-SLOW-NEXT: vporq %ymm19, %ymm20, %ymm6 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm5 -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm6[0,1,2,3],zmm5[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm6[0,1,2,3],zmm5[0,1,2,3] ; AVX512F-SLOW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm16 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm16 ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload ; AVX512F-SLOW-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm26 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm17, %zmm26 ; AVX512F-SLOW-NEXT: vporq %ymm21, %ymm22, %ymm1 -; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 ; AVX512F-SLOW-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm5 # 64-byte Reload -; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[0,1,2,3] ; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2 ; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2 ; AVX512F-SLOW-NEXT: vpermq $68, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Folded Reload @@ -7706,9 +7702,9 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm7 = [128,128,128,128,128,14,128,128,128,128,128,128,15,128,128,128,128,128,128,16,128,128,128,128,128,128,17,128,128,128,128,128] ; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm1, %ymm0 ; AVX512F-FAST-NEXT: vmovdqa64 %ymm1, %ymm26 -; AVX512F-FAST-NEXT: vmovdqa (%rdx), %ymm1 -; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,0,1,14,128,14,15,0,1,14,15,128,13,14,15,16,17,16,128,30,31,30,31,16,17,128,31,28,29,30,31] -; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm1, %ymm3 +; AVX512F-FAST-NEXT: vmovdqa (%rdx), %ymm2 +; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,0,1,14,128,14,15,0,1,14,15,128,13,14,15,16,17,16,128,30,31,30,31,16,17,128,31,28,29,30,31] +; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm2, %ymm3 ; AVX512F-FAST-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX512F-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-FAST-NEXT: vmovdqa (%r8), %ymm0 @@ -7753,78 +7749,78 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512F-FAST-NEXT: vporq %xmm9, %xmm12, %xmm22 ; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm13, %ymm7 ; AVX512F-FAST-NEXT: vmovdqa64 %ymm13, %ymm20 -; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm14, %ymm2 -; AVX512F-FAST-NEXT: vpor %ymm7, %ymm2, %ymm2 -; AVX512F-FAST-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm14, %ymm1 +; AVX512F-FAST-NEXT: vpor %ymm7, %ymm1, %ymm1 +; AVX512F-FAST-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-FAST-NEXT: vmovdqa64 %ymm16, %ymm7 -; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,ymm7[14],zero,zero,zero,zero,zero,zero,ymm7[15],zero,zero,zero,zero,zero,zero,ymm7[16],zero,zero,zero,zero,zero,zero,ymm7[17],zero,zero,zero,zero,zero,zero,ymm7[18] +; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,ymm7[14],zero,zero,zero,zero,zero,zero,ymm7[15],zero,zero,zero,zero,zero,zero,ymm7[16],zero,zero,zero,zero,zero,zero,ymm7[17],zero,zero,zero,zero,zero,zero,ymm7[18] ; AVX512F-FAST-NEXT: vmovdqa64 %ymm17, %ymm7 ; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,14],zero,ymm7[12,13,0,1,14,15],zero,ymm7[3,12,13,2,3,16],zero,ymm7[30,31,28,29,16,17],zero,ymm7[31,18,19,28,29,18],zero -; AVX512F-FAST-NEXT: vpor %ymm2, %ymm7, %ymm2 -; AVX512F-FAST-NEXT: vmovdqu64 %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-FAST-NEXT: vmovdqa64 %ymm18, %ymm2 +; AVX512F-FAST-NEXT: vpor %ymm1, %ymm7, %ymm1 +; AVX512F-FAST-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; AVX512F-FAST-NEXT: vmovdqa64 %ymm18, %ymm1 ; AVX512F-FAST-NEXT: vmovdqu64 %ymm18, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-FAST-NEXT: vmovdqa64 %ymm25, %ymm7 -; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm2, %ymm2 +; AVX512F-FAST-NEXT: vpshufb %ymm7, %ymm1, %ymm1 ; AVX512F-FAST-NEXT: vmovdqa64 %ymm30, %ymm0 ; AVX512F-FAST-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-FAST-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX512F-FAST-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-FAST-NEXT: vmovdqa (%rsi), %xmm13 ; AVX512F-FAST-NEXT: vpshufb %xmm11, %xmm13, %xmm0 ; AVX512F-FAST-NEXT: vmovdqa (%rdi), %xmm9 -; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm9, %xmm2 -; AVX512F-FAST-NEXT: vporq %xmm0, %xmm2, %xmm31 +; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm9, %xmm1 +; AVX512F-FAST-NEXT: vporq %xmm0, %xmm1, %xmm31 ; AVX512F-FAST-NEXT: vmovdqa (%rcx), %xmm14 ; AVX512F-FAST-NEXT: vpshufb %xmm8, %xmm14, %xmm0 ; AVX512F-FAST-NEXT: vmovdqa (%rdx), %xmm8 -; AVX512F-FAST-NEXT: vmovdqa64 %xmm19, %xmm2 -; AVX512F-FAST-NEXT: vpshufb %xmm2, %xmm8, %xmm2 -; AVX512F-FAST-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-FAST-NEXT: vmovdqa64 %xmm19, %xmm1 +; AVX512F-FAST-NEXT: vpshufb %xmm1, %xmm8, %xmm1 +; AVX512F-FAST-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX512F-FAST-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-FAST-NEXT: vmovdqa (%r9), %xmm2 -; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm2, %xmm0 -; AVX512F-FAST-NEXT: vmovdqa %xmm2, %xmm3 -; AVX512F-FAST-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512F-FAST-NEXT: vmovdqa (%r9), %xmm1 +; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm1, %xmm0 +; AVX512F-FAST-NEXT: vmovdqa %xmm1, %xmm3 +; AVX512F-FAST-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512F-FAST-NEXT: vmovdqa (%r8), %xmm4 -; AVX512F-FAST-NEXT: vmovdqa64 %xmm21, %xmm2 -; AVX512F-FAST-NEXT: vpshufb %xmm2, %xmm4, %xmm2 +; AVX512F-FAST-NEXT: vmovdqa64 %xmm21, %xmm1 +; AVX512F-FAST-NEXT: vpshufb %xmm1, %xmm4, %xmm1 ; AVX512F-FAST-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX512F-FAST-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-FAST-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm0[23],zero,zero,zero,zero,ymm0[26],zero,ymm0[24],zero,zero,zero,zero,ymm0[27],zero,ymm0[25] ; AVX512F-FAST-NEXT: vmovdqa64 %ymm23, %ymm12 -; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm12[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm12[21],zero,ymm12[19],zero,zero,zero,zero,ymm12[22],zero,ymm12[20],zero,zero -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm12[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm12[21],zero,ymm12[19],zero,zero,zero,zero,ymm12[22],zero,ymm12[20],zero,zero +; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-FAST-NEXT: vmovdqa64 %ymm26, %ymm11 ; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm11[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,ymm11[25],zero,ymm11[23],zero,zero,zero,zero,ymm11[26],zero,ymm11[24],zero,zero,zero,zero -; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm11[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm11[18],zero,zero,zero,zero,ymm11[21],zero,ymm11[19],zero,zero,zero,zero,ymm11[22],zero,ymm11[20] -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm11[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm11[18],zero,zero,zero,zero,ymm11[21],zero,ymm11[19],zero,zero,zero,zero,ymm11[22],zero,ymm11[20] +; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27,24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27] -; AVX512F-FAST-NEXT: # ymm2 = mem[0,1,0,1] +; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27,24,25,128,23,128,21,22,23,26,128,24,128,28,29,26,27] +; AVX512F-FAST-NEXT: # ymm1 = mem[0,1,0,1] ; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [18,128,18,19,20,21,128,19,128,25,26,27,22,128,20,128,18,128,18,19,20,21,128,19,128,25,26,27,22,128,20,128] ; AVX512F-FAST-NEXT: # ymm5 = mem[0,1,0,1] -; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm1, %ymm0 -; AVX512F-FAST-NEXT: vmovdqa64 %ymm2, %ymm19 -; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm1, %ymm2 +; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm2, %ymm0 +; AVX512F-FAST-NEXT: vmovdqa64 %ymm1, %ymm19 +; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm2, %ymm1 ; AVX512F-FAST-NEXT: vmovdqa64 %ymm5, %ymm30 -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm24, %zmm0, %zmm0 -; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] +; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = <u,6,7,2,3,u,u,u,8,9,4,5,u,u,u,10> -; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,0,1],zmm0[4,5,6,7] +; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[4,5,6,7] ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,3,3,2,2,3,3] -; AVX512F-FAST-NEXT: # ymm2 = mem[0,1,0,1] +; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,3,3,2,2,3,3] +; AVX512F-FAST-NEXT: # ymm1 = mem[0,1,0,1] ; AVX512F-FAST-NEXT: vmovdqa (%rax), %xmm0 ; AVX512F-FAST-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,6] -; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0 +; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512F-FAST-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-FAST-NEXT: vmovdqa (%rax), %ymm4 ; AVX512F-FAST-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill @@ -7832,52 +7828,52 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm4, %ymm4 ; AVX512F-FAST-NEXT: vmovdqa64 %ymm5, %ymm18 ; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm24 -; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm1[30],zero,ymm1[28],zero,zero,zero,zero,ymm1[31],zero,ymm1[29],zero,zero +; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm2[30],zero,ymm2[28],zero,zero,zero,zero,ymm2[31],zero,ymm2[29],zero,zero ; AVX512F-FAST-NEXT: vmovdqa64 %ymm0, %ymm23 -; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14] +; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14] ; AVX512F-FAST-NEXT: vmovdqu (%rsp), %ymm0 # 32-byte Reload -; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512F-FAST-NEXT: vmovdqa64 %ymm1, %ymm25 +; AVX512F-FAST-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; AVX512F-FAST-NEXT: vmovdqa64 %ymm2, %ymm25 ; AVX512F-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX512F-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm1[27],zero,zero,zero,zero,ymm1[30],zero,ymm1[28],zero,zero,zero,zero,ymm1[31],zero,ymm1[29] -; AVX512F-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3] +; AVX512F-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX512F-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero,ymm2[27],zero,zero,zero,zero,ymm2[30],zero,ymm2[28],zero,zero,zero,zero,ymm2[31],zero,ymm2[29] +; AVX512F-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,3] ; AVX512F-FAST-NEXT: vbroadcasti32x4 {{.*#+}} ymm26 = [18374967954648269055,71777218572844800,18374967954648269055,71777218572844800] ; AVX512F-FAST-NEXT: # ymm26 = mem[0,1,2,3,0,1,2,3] -; AVX512F-FAST-NEXT: vpternlogq $248, %ymm26, %ymm0, %ymm1 +; AVX512F-FAST-NEXT: vpternlogq $248, %ymm26, %ymm0, %ymm2 ; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm10[0],xmm15[0],xmm10[1],xmm15[1],xmm10[2],xmm15[2],xmm10[3],xmm15[3],xmm10[4],xmm15[4],xmm10[5],xmm15[5],xmm10[6],xmm15[6],xmm10[7],xmm15[7] ; AVX512F-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,4,5,0,1,u,u,u,6,7,2,3,u,u,u] -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm1[0,1,2,3],zmm0[0,1,0,1] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm21 = zmm2[0,1,2,3],zmm0[0,1,0,1] ; AVX512F-FAST-NEXT: vmovdqa64 %xmm29, %xmm0 -; AVX512F-FAST-NEXT: vmovdqa64 %xmm28, %xmm1 -; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm14[8],xmm8[8],xmm14[9],xmm8[9],xmm14[10],xmm8[10],xmm14[11],xmm8[11],xmm14[12],xmm8[12],xmm14[13],xmm8[13],xmm14[14],xmm8[14],xmm14[15],xmm8[15] +; AVX512F-FAST-NEXT: vmovdqa64 %xmm28, %xmm2 +; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm14[8],xmm8[8],xmm14[9],xmm8[9],xmm14[10],xmm8[10],xmm14[11],xmm8[11],xmm14[12],xmm8[12],xmm14[13],xmm8[13],xmm14[14],xmm8[14],xmm14[15],xmm8[15] ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm4 = <6,3,2,u,u,u,9,8,5,4,u,u,u,11,10,7> -; AVX512F-FAST-NEXT: vpshufb %xmm4, %xmm1, %xmm1 -; AVX512F-FAST-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512F-FAST-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX512F-FAST-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-FAST-NEXT: vpshufb %xmm4, %xmm0, %xmm0 -; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload -; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm0, %zmm1, %zmm28 +; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Reload +; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm28 ; AVX512F-FAST-NEXT: vmovdqa64 %xmm27, %xmm0 -; AVX512F-FAST-NEXT: vmovdqa %xmm6, %xmm1 +; AVX512F-FAST-NEXT: vmovdqa %xmm6, %xmm2 ; AVX512F-FAST-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] -; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] ; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm13[8],xmm9[8],xmm13[9],xmm9[9],xmm13[10],xmm9[10],xmm13[11],xmm9[11],xmm13[12],xmm9[12],xmm13[13],xmm9[13],xmm13[14],xmm9[14],xmm13[15],xmm9[15] ; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = <2,u,u,u,9,8,5,4,u,u,u,11,10,7,6,u> ; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm4, %xmm0 ; AVX512F-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm1, %xmm1 +; AVX512F-FAST-NEXT: vpshufb %xmm5, %xmm2, %xmm2 ; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload -; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm27 -; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm10[8],xmm15[8],xmm10[9],xmm15[9],xmm10[10],xmm15[10],xmm10[11],xmm15[11],xmm10[12],xmm15[12],xmm10[13],xmm15[13],xmm10[14],xmm15[14],xmm10[15],xmm15[15] -; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm22[0,1,0,1],zmm1[0,1,0,1] +; AVX512F-FAST-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm27 +; AVX512F-FAST-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm10[8],xmm15[8],xmm10[9],xmm15[9],xmm10[10],xmm15[10],xmm10[11],xmm15[11],xmm10[12],xmm15[12],xmm10[13],xmm15[13],xmm10[14],xmm15[14],xmm10[15],xmm15[15] +; AVX512F-FAST-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm22[0,1,0,1],zmm2[0,1,0,1] ; AVX512F-FAST-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; AVX512F-FAST-NEXT: vmovdqa 32(%rax), %xmm0 -; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,5,6] +; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5,5,6] ; AVX512F-FAST-NEXT: vmovdqa64 %xmm0, %xmm29 -; AVX512F-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm0 +; AVX512F-FAST-NEXT: vpermd %ymm2, %ymm1, %ymm0 ; AVX512F-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12,11,0,0,0,15,14,13,12] ; AVX512F-FAST-NEXT: vpshufb %ymm1, %ymm11, %ymm5 @@ -8065,9 +8061,8 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512F-FAST-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 ; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm8, %zmm0 ; AVX512F-FAST-NEXT: vpor %ymm12, %ymm15, %ymm2 -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 ; AVX512F-FAST-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[0,1,2,3] ; AVX512F-FAST-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm16 ; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm16 ; AVX512F-FAST-NEXT: vpermq $68, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2 # 64-byte Folded Reload @@ -8079,8 +8074,7 @@ define void @store_i8_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm5, %zmm2 ; AVX512F-FAST-NEXT: vpor %ymm1, %ymm13, %ymm1 ; AVX512F-FAST-NEXT: vpor %ymm11, %ymm14, %ymm5 -; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 -; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-FAST-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm5[0,1,2,3],zmm1[0,1,2,3] ; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm4, %zmm20, %zmm4 ; AVX512F-FAST-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm4 ; AVX512F-FAST-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm9, %zmm4 diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll index 257c9a2..7bb3874 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll @@ -572,45 +572,45 @@ define void @store_i8_stride8_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512F-SLOW-NEXT: vzeroupper ; AVX512F-SLOW-NEXT: retq ; -; AVX512-FAST-LABEL: store_i8_stride8_vf8: -; AVX512-FAST: # %bb.0: -; AVX512-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero -; AVX512-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] -; AVX512-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 -; AVX512-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,3,5,7,1,3,5,7] -; AVX512-FAST-NEXT: # ymm2 = mem[0,1,0,1] -; AVX512-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm3 -; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = <u,u,u,u,0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15> -; AVX512-FAST-NEXT: vpshufb %ymm4, %ymm3, %ymm3 -; AVX512-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2 -; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15,u,u,u,u> -; AVX512-FAST-NEXT: vpshufb %ymm5, %ymm2, %ymm2 -; AVX512-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] -; AVX512-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4,6,0,2,4,6] -; AVX512-FAST-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512-FAST-NEXT: vpermd %ymm1, %ymm3, %ymm1 -; AVX512-FAST-NEXT: vpshufb %ymm4, %ymm1, %ymm1 -; AVX512-FAST-NEXT: vpermd %ymm0, %ymm3, %ymm0 -; AVX512-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm0 -; AVX512-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX512-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512-FAST-NEXT: vmovdqa64 %zmm0, (%rax) -; AVX512-FAST-NEXT: vzeroupper -; AVX512-FAST-NEXT: retq +; AVX512F-FAST-LABEL: store_i8_stride8_vf8: +; AVX512F-FAST: # %bb.0: +; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; AVX512F-FAST-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero +; AVX512F-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; AVX512F-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 +; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,3,5,7,1,3,5,7] +; AVX512F-FAST-NEXT: # ymm2 = mem[0,1,0,1] +; AVX512F-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm3 +; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = <u,u,u,u,0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15> +; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2 +; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,2,6,10,14,u,u,u,u,3,7,11,15,u,u,u,u> +; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm2, %ymm2 +; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] +; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4,6,0,2,4,6] +; AVX512F-FAST-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512F-FAST-NEXT: vpermd %ymm1, %ymm3, %ymm1 +; AVX512F-FAST-NEXT: vpshufb %ymm4, %ymm1, %ymm1 +; AVX512F-FAST-NEXT: vpermd %ymm0, %ymm3, %ymm0 +; AVX512F-FAST-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-FAST-NEXT: vmovdqa64 %zmm0, (%rax) +; AVX512F-FAST-NEXT: vzeroupper +; AVX512F-FAST-NEXT: retq ; ; AVX512BW-SLOW-LABEL: store_i8_stride8_vf8: ; AVX512BW-SLOW: # %bb.0: @@ -629,32 +629,61 @@ define void @store_i8_stride8_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero ; AVX512BW-SLOW-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero ; AVX512BW-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512BW-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,u,u,22,30,u,u,u,u,u,u,23,31] -; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm1[2,3,0,1] -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,22,30,u,u,u,u,u,u,23,31,u,u] -; AVX512BW-SLOW-NEXT: movw $17544, %cx # imm = 0x4488 +; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm1 = zmm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u] +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u,u,u] +; AVX512BW-SLOW-NEXT: movl $287445282, %ecx # imm = 0x11221122 ; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1 -; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm4, %ymm2 {%k1} -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm0[4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,u,u,22,30,u,u,u,u,u,u,23,31,u,u,u,u] -; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} ymm5 = ymm0[2,3,0,1] -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm6 = ymm5[u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,22,30,u,u,u,u,u,u,23,31,u,u,u,u,u,u] -; AVX512BW-SLOW-NEXT: movw $4386, %cx # imm = 0x1122 -; AVX512BW-SLOW-NEXT: kmovd %ecx, %k2 -; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm6, %ymm4 {%k2} -; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2],ymm2[3],ymm4[4],ymm2[5],ymm4[6],ymm2[7] -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27] -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u] -; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm3, %ymm1 {%k1} -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u] -; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm5[u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u] -; AVX512BW-SLOW-NEXT: vmovdqu16 %ymm3, %ymm0 {%k2} -; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rax) +; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[2,3,0,1,2,3,0,1] +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u] +; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3,0,1,2,3] +; AVX512BW-SLOW-NEXT: vpshufb {{.*#+}} zmm2 = zmm2[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63] +; AVX512BW-SLOW-NEXT: movl $1149781128, %ecx # imm = 0x44884488 +; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1 +; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} +; AVX512BW-SLOW-NEXT: movw $-21846, %cx # imm = 0xAAAA +; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1 +; AVX512BW-SLOW-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm1, (%rax) ; AVX512BW-SLOW-NEXT: vzeroupper ; AVX512BW-SLOW-NEXT: retq +; +; AVX512BW-FAST-LABEL: store_i8_stride8_vf8: +; AVX512BW-FAST: # %bb.0: +; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero +; AVX512BW-FAST-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero +; AVX512BW-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; AVX512BW-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 +; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,10,12,14,8,10,12,14,9,11,13,15,9,11,13,15] +; AVX512BW-FAST-NEXT: vpermd %zmm1, %zmm2, %zmm1 +; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[u,u,u,u,0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,18,22,26,30,u,u,u,u,19,23,27,31,u,u,u,u,32,36,40,44,u,u,u,u,33,37,41,45,u,u,u,u,50,54,58,62,u,u,u,u,51,55,59,63] +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,4,6,0,2,4,6,1,3,5,7,1,3,5,7] +; AVX512BW-FAST-NEXT: vpermd %zmm0, %zmm2, %zmm0 +; AVX512BW-FAST-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,4,8,12,u,u,u,u,1,5,9,13,u,u,u,u,18,22,26,30,u,u,u,u,19,23,27,31,u,u,u,u,32,36,40,44,u,u,u,u,33,37,41,45,u,u,u,u,50,54,58,62,u,u,u,u,51,55,59,63,u,u,u,u] +; AVX512BW-FAST-NEXT: movw $-21846, %cx # imm = 0xAAAA +; AVX512BW-FAST-NEXT: kmovd %ecx, %k1 +; AVX512BW-FAST-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, (%rax) +; AVX512BW-FAST-NEXT: vzeroupper +; AVX512BW-FAST-NEXT: retq %in.vec0 = load <8 x i8>, ptr %in.vecptr0, align 64 %in.vec1 = load <8 x i8>, ptr %in.vecptr1, align 64 %in.vec2 = load <8 x i8>, ptr %in.vecptr2, align 64 @@ -1051,69 +1080,182 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX2-ONLY-NEXT: vzeroupper ; AVX2-ONLY-NEXT: retq ; -; AVX512-LABEL: store_i8_stride8_vf16: -; AVX512: # %bb.0: -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovdqa (%rdx), %xmm1 -; AVX512-NEXT: vmovdqa (%r8), %xmm2 -; AVX512-NEXT: vmovdqa (%r11), %xmm3 -; AVX512-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0 -; AVX512-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1 -; AVX512-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2 -; AVX512-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3 -; AVX512-NEXT: vpermq {{.*#+}} ymm4 = ymm3[0,2,0,2] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm5 = <u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15> -; AVX512-NEXT: vpshufb %ymm5, %ymm4, %ymm6 -; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm2[0,2,0,2] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm8 = <u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u> -; AVX512-NEXT: vpshufb %ymm8, %ymm7, %ymm9 -; AVX512-NEXT: vpblendw {{.*#+}} ymm6 = ymm9[0,1,2],ymm6[3],ymm9[4,5,6],ymm6[7],ymm9[8,9,10],ymm6[11],ymm9[12,13,14],ymm6[15] -; AVX512-NEXT: vpermq {{.*#+}} ymm9 = ymm1[0,2,0,2] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm10 = <u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u> -; AVX512-NEXT: vpshufb %ymm10, %ymm9, %ymm11 -; AVX512-NEXT: vpermq {{.*#+}} ymm12 = ymm0[0,2,0,2] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm13 = <4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %ymm13, %ymm12, %ymm14 -; AVX512-NEXT: vpblendw {{.*#+}} ymm11 = ymm14[0],ymm11[1],ymm14[2,3,4],ymm11[5],ymm14[6,7,8],ymm11[9],ymm14[10,11,12],ymm11[13],ymm14[14,15] -; AVX512-NEXT: vpblendd {{.*#+}} ymm6 = ymm11[0],ymm6[1],ymm11[2],ymm6[3],ymm11[4],ymm6[5],ymm11[6],ymm6[7] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm11 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11> -; AVX512-NEXT: vpshufb %ymm11, %ymm4, %ymm4 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm14 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u> -; AVX512-NEXT: vpshufb %ymm14, %ymm7, %ymm7 -; AVX512-NEXT: vpblendw {{.*#+}} ymm4 = ymm7[0,1,2],ymm4[3],ymm7[4,5,6],ymm4[7],ymm7[8,9,10],ymm4[11],ymm7[12,13,14],ymm4[15] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm7 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u> -; AVX512-NEXT: vpshufb %ymm7, %ymm9, %ymm9 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm15 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %ymm15, %ymm12, %ymm12 -; AVX512-NEXT: vpblendw {{.*#+}} ymm9 = ymm12[0],ymm9[1],ymm12[2,3,4],ymm9[5],ymm12[6,7,8],ymm9[9],ymm12[10,11,12],ymm9[13],ymm12[14,15] -; AVX512-NEXT: vpblendd {{.*#+}} ymm4 = ymm9[0],ymm4[1],ymm9[2],ymm4[3],ymm9[4],ymm4[5],ymm9[6],ymm4[7] -; AVX512-NEXT: vinserti64x4 $1, %ymm6, %zmm4, %zmm4 -; AVX512-NEXT: vpermq {{.*#+}} ymm3 = ymm3[1,3,1,3] -; AVX512-NEXT: vpshufb %ymm5, %ymm3, %ymm5 -; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] -; AVX512-NEXT: vpshufb %ymm8, %ymm2, %ymm6 -; AVX512-NEXT: vpblendw {{.*#+}} ymm5 = ymm6[0,1,2],ymm5[3],ymm6[4,5,6],ymm5[7],ymm6[8,9,10],ymm5[11],ymm6[12,13,14],ymm5[15] -; AVX512-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3] -; AVX512-NEXT: vpshufb %ymm10, %ymm1, %ymm6 -; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3] -; AVX512-NEXT: vpshufb %ymm13, %ymm0, %ymm8 -; AVX512-NEXT: vpblendw {{.*#+}} ymm6 = ymm8[0],ymm6[1],ymm8[2,3,4],ymm6[5],ymm8[6,7,8],ymm6[9],ymm8[10,11,12],ymm6[13],ymm8[14,15] -; AVX512-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7] -; AVX512-NEXT: vpshufb %ymm11, %ymm3, %ymm3 -; AVX512-NEXT: vpshufb %ymm14, %ymm2, %ymm2 -; AVX512-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2],ymm3[3],ymm2[4,5,6],ymm3[7],ymm2[8,9,10],ymm3[11],ymm2[12,13,14],ymm3[15] -; AVX512-NEXT: vpshufb %ymm7, %ymm1, %ymm1 -; AVX512-NEXT: vpshufb %ymm15, %ymm0, %ymm0 -; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4],ymm1[5],ymm0[6,7,8],ymm1[9],ymm0[10,11,12],ymm1[13],ymm0[14,15] -; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] -; AVX512-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rax) -; AVX512-NEXT: vmovdqa64 %zmm4, (%rax) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: store_i8_stride8_vf16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovdqa (%rdx), %xmm1 +; AVX512F-NEXT: vmovdqa (%r8), %xmm2 +; AVX512F-NEXT: vmovdqa (%r11), %xmm3 +; AVX512F-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0 +; AVX512F-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1 +; AVX512F-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2 +; AVX512F-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3 +; AVX512F-NEXT: vpermq {{.*#+}} ymm4 = ymm3[0,2,0,2] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = <u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15> +; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm6 +; AVX512F-NEXT: vpermq {{.*#+}} ymm7 = ymm2[0,2,0,2] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = <u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u> +; AVX512F-NEXT: vpshufb %ymm8, %ymm7, %ymm9 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm6 = ymm9[0,1,2],ymm6[3],ymm9[4,5,6],ymm6[7],ymm9[8,9,10],ymm6[11],ymm9[12,13,14],ymm6[15] +; AVX512F-NEXT: vpermq {{.*#+}} ymm9 = ymm1[0,2,0,2] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm10 = <u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u> +; AVX512F-NEXT: vpshufb %ymm10, %ymm9, %ymm11 +; AVX512F-NEXT: vpermq {{.*#+}} ymm12 = ymm0[0,2,0,2] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm13 = <4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u> +; AVX512F-NEXT: vpshufb %ymm13, %ymm12, %ymm14 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm11 = ymm14[0],ymm11[1],ymm14[2,3,4],ymm11[5],ymm14[6,7,8],ymm11[9],ymm14[10,11,12],ymm11[13],ymm14[14,15] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm6 = ymm11[0],ymm6[1],ymm11[2],ymm6[3],ymm11[4],ymm6[5],ymm11[6],ymm6[7] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm11 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11> +; AVX512F-NEXT: vpshufb %ymm11, %ymm4, %ymm4 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm14 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u> +; AVX512F-NEXT: vpshufb %ymm14, %ymm7, %ymm7 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm7[0,1,2],ymm4[3],ymm7[4,5,6],ymm4[7],ymm7[8,9,10],ymm4[11],ymm7[12,13,14],ymm4[15] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u> +; AVX512F-NEXT: vpshufb %ymm7, %ymm9, %ymm9 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm15 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u> +; AVX512F-NEXT: vpshufb %ymm15, %ymm12, %ymm12 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm9 = ymm12[0],ymm9[1],ymm12[2,3,4],ymm9[5],ymm12[6,7,8],ymm9[9],ymm12[10,11,12],ymm9[13],ymm12[14,15] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm4 = ymm9[0],ymm4[1],ymm9[2],ymm4[3],ymm9[4],ymm4[5],ymm9[6],ymm4[7] +; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm4, %zmm4 +; AVX512F-NEXT: vpermq {{.*#+}} ymm3 = ymm3[1,3,1,3] +; AVX512F-NEXT: vpshufb %ymm5, %ymm3, %ymm5 +; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512F-NEXT: vpshufb %ymm8, %ymm2, %ymm6 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm5 = ymm6[0,1,2],ymm5[3],ymm6[4,5,6],ymm5[7],ymm6[8,9,10],ymm5[11],ymm6[12,13,14],ymm5[15] +; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3] +; AVX512F-NEXT: vpshufb %ymm10, %ymm1, %ymm6 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3] +; AVX512F-NEXT: vpshufb %ymm13, %ymm0, %ymm8 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm6 = ymm8[0],ymm6[1],ymm8[2,3,4],ymm6[5],ymm8[6,7,8],ymm6[9],ymm8[10,11,12],ymm6[13],ymm8[14,15] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7] +; AVX512F-NEXT: vpshufb %ymm11, %ymm3, %ymm3 +; AVX512F-NEXT: vpshufb %ymm14, %ymm2, %ymm2 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2],ymm3[3],ymm2[4,5,6],ymm3[7],ymm2[8,9,10],ymm3[11],ymm2[12,13,14],ymm3[15] +; AVX512F-NEXT: vpshufb %ymm7, %ymm1, %ymm1 +; AVX512F-NEXT: vpshufb %ymm15, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4],ymm1[5],ymm0[6,7,8],ymm1[9],ymm0[10,11,12],ymm1[13],ymm0[14,15] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 +; AVX512F-NEXT: vmovdqa64 %zmm0, 64(%rax) +; AVX512F-NEXT: vmovdqa64 %zmm4, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-SLOW-LABEL: store_i8_stride8_vf16: +; AVX512BW-SLOW: # %bb.0: +; AVX512BW-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512BW-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512BW-SLOW-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512BW-SLOW-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512BW-SLOW-NEXT: vmovdqa (%rdx), %xmm1 +; AVX512BW-SLOW-NEXT: vmovdqa (%r8), %xmm2 +; AVX512BW-SLOW-NEXT: vmovdqa (%r11), %xmm3 +; AVX512BW-SLOW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1 +; AVX512BW-SLOW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0 +; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 +; AVX512BW-SLOW-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3 +; AVX512BW-SLOW-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2 +; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm3 +; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm4 = zmm2[0,2,0,2,4,6,4,6] +; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm5 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u> +; AVX512BW-SLOW-NEXT: vpshufb %zmm5, %zmm4, %zmm4 +; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[4,5,6,7,4,5,6,7] +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm6 = zmm3[0,2,0,2,4,6,4,6] +; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm7 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15> +; AVX512BW-SLOW-NEXT: vpshufb %zmm7, %zmm6, %zmm6 +; AVX512BW-SLOW-NEXT: movl $8913032, %ecx # imm = 0x880088 +; AVX512BW-SLOW-NEXT: kmovd %ecx, %k1 +; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm6, %zmm4 {%k1} +; AVX512BW-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm6 = zmm0[0,2,0,2,4,6,4,6] +; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm8 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u> +; AVX512BW-SLOW-NEXT: vpshufb %zmm8, %zmm6, %zmm6 +; AVX512BW-SLOW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[4,5,6,7,4,5,6,7] +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm9 = zmm1[0,2,0,2,4,6,4,6] +; AVX512BW-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm10 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u> +; AVX512BW-SLOW-NEXT: vpshufb %zmm10, %zmm9, %zmm9 +; AVX512BW-SLOW-NEXT: movl $2228258, %ecx # imm = 0x220022 +; AVX512BW-SLOW-NEXT: kmovd %ecx, %k2 +; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm9, %zmm6 {%k2} +; AVX512BW-SLOW-NEXT: movw $-21846, %cx # imm = 0xAAAA +; AVX512BW-SLOW-NEXT: kmovd %ecx, %k3 +; AVX512BW-SLOW-NEXT: vmovdqa32 %zmm4, %zmm6 {%k3} +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm2 = zmm2[1,3,1,3,5,7,5,7] +; AVX512BW-SLOW-NEXT: vpshufb %zmm5, %zmm2, %zmm2 +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm3 = zmm3[1,3,1,3,5,7,5,7] +; AVX512BW-SLOW-NEXT: vpshufb %zmm7, %zmm3, %zmm3 +; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm3, %zmm2 {%k1} +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,1,3,5,7,5,7] +; AVX512BW-SLOW-NEXT: vpshufb %zmm8, %zmm0, %zmm0 +; AVX512BW-SLOW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[1,3,1,3,5,7,5,7] +; AVX512BW-SLOW-NEXT: vpshufb %zmm10, %zmm1, %zmm1 +; AVX512BW-SLOW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k2} +; AVX512BW-SLOW-NEXT: vmovdqa32 %zmm2, %zmm0 {%k3} +; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, 64(%rax) +; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm6, (%rax) +; AVX512BW-SLOW-NEXT: vzeroupper +; AVX512BW-SLOW-NEXT: retq +; +; AVX512BW-FAST-LABEL: store_i8_stride8_vf16: +; AVX512BW-FAST: # %bb.0: +; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; AVX512BW-FAST-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512BW-FAST-NEXT: vmovdqa (%rdx), %xmm1 +; AVX512BW-FAST-NEXT: vmovdqa (%r8), %xmm2 +; AVX512BW-FAST-NEXT: vmovdqa (%r11), %xmm3 +; AVX512BW-FAST-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1 +; AVX512BW-FAST-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0 +; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm4 +; AVX512BW-FAST-NEXT: vinserti128 $1, (%r10), %ymm3, %ymm3 +; AVX512BW-FAST-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2 +; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm5 +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm6 = [0,2,0,2,12,14,12,14] +; AVX512BW-FAST-NEXT: vpermt2q %zmm5, %zmm6, %zmm3 +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm7 = <u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15> +; AVX512BW-FAST-NEXT: vpshufb %zmm7, %zmm3, %zmm3 +; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm8 = zmm2[0,2,0,2,4,6,4,6] +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm9 = <u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u> +; AVX512BW-FAST-NEXT: vpshufb %zmm9, %zmm8, %zmm8 +; AVX512BW-FAST-NEXT: movl $8913032, %ecx # imm = 0x880088 +; AVX512BW-FAST-NEXT: kmovd %ecx, %k1 +; AVX512BW-FAST-NEXT: vmovdqu16 %zmm3, %zmm8 {%k1} +; AVX512BW-FAST-NEXT: vpermt2q %zmm4, %zmm6, %zmm1 +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm3 = <u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u> +; AVX512BW-FAST-NEXT: vpshufb %zmm3, %zmm1, %zmm1 +; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm6 = zmm0[0,2,0,2,4,6,4,6] +; AVX512BW-FAST-NEXT: vmovdqa64 {{.*#+}} zmm10 = <0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15,u,u,u,u,u,u> +; AVX512BW-FAST-NEXT: vpshufb %zmm10, %zmm6, %zmm6 +; AVX512BW-FAST-NEXT: movl $2228258, %ecx # imm = 0x220022 +; AVX512BW-FAST-NEXT: kmovd %ecx, %k2 +; AVX512BW-FAST-NEXT: vmovdqu16 %zmm1, %zmm6 {%k2} +; AVX512BW-FAST-NEXT: movw $-21846, %cx # imm = 0xAAAA +; AVX512BW-FAST-NEXT: kmovd %ecx, %k3 +; AVX512BW-FAST-NEXT: vmovdqa32 %zmm8, %zmm6 {%k3} +; AVX512BW-FAST-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [5,7,5,7,5,7,5,7] +; AVX512BW-FAST-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-FAST-NEXT: vpermq %zmm5, %zmm1, %zmm5 +; AVX512BW-FAST-NEXT: vpshufb %zmm7, %zmm5, %zmm5 +; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm2 = zmm2[1,3,1,3,5,7,5,7] +; AVX512BW-FAST-NEXT: vpshufb %zmm9, %zmm2, %zmm2 +; AVX512BW-FAST-NEXT: vmovdqu16 %zmm5, %zmm2 {%k1} +; AVX512BW-FAST-NEXT: vpermq %zmm4, %zmm1, %zmm1 +; AVX512BW-FAST-NEXT: vpshufb %zmm3, %zmm1, %zmm1 +; AVX512BW-FAST-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,3,1,3,5,7,5,7] +; AVX512BW-FAST-NEXT: vpshufb %zmm10, %zmm0, %zmm0 +; AVX512BW-FAST-NEXT: vmovdqu16 %zmm1, %zmm0 {%k2} +; AVX512BW-FAST-NEXT: vmovdqa32 %zmm2, %zmm0 {%k3} +; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, 64(%rax) +; AVX512BW-FAST-NEXT: vmovdqa64 %zmm6, (%rax) +; AVX512BW-FAST-NEXT: vzeroupper +; AVX512BW-FAST-NEXT: retq %in.vec0 = load <16 x i8>, ptr %in.vecptr0, align 64 %in.vec1 = load <16 x i8>, ptr %in.vecptr1, align 64 %in.vec2 = load <16 x i8>, ptr %in.vecptr2, align 64 @@ -6988,6 +7130,8 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; AVX1: {{.*}} +; AVX512: {{.*}} +; AVX512-FAST: {{.*}} ; AVX512-SLOW: {{.*}} ; AVX512BW: {{.*}} ; AVX512BW-ONLY: {{.*}} @@ -6997,7 +7141,6 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQBW-FAST: {{.*}} ; AVX512DQBW-ONLY: {{.*}} ; AVX512DQBW-SLOW: {{.*}} -; AVX512F: {{.*}} ; AVX512F-ONLY: {{.*}} ; FALLBACK0: {{.*}} ; FALLBACK1: {{.*}} diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll index 86e8782..a7b1fa2 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll @@ -5684,10 +5684,9 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in ; ; AVX512BW-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[0],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[16],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[32],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero,zmm0[48],zero ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) @@ -5797,10 +5796,9 @@ define void @vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16(ptr %in ; ; AVX512BW-LABEL: vec512_i8_widen_to_i32_factor4_broadcast_to_v16i32_factor16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[0],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[16],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[32],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero,zmm0[48],zero,zero,zero ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) @@ -5910,10 +5908,9 @@ define void @vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8(ptr %in.v ; ; AVX512BW-LABEL: vec512_i8_widen_to_i64_factor8_broadcast_to_v8i64_factor8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[0],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[16],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[32],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero,zmm0[48],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) @@ -6004,10 +6001,9 @@ define void @vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512BW-LABEL: vec512_i8_widen_to_i128_factor16_broadcast_to_v4i128_factor4: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) @@ -6211,12 +6207,12 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; ; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero -; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63] +; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -6330,12 +6326,12 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512BW-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,0,37,38,39,0,41,42,43,0,45,46,47,0,49,50,51,0,53,54,55,0,57,58,59,0,61,62,63] +; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -6449,12 +6445,12 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512BW-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vpaddb (%rsi), %ymm0, %ymm0 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,33,34,35,36,37,38,39,0,41,42,43,44,45,46,47,0,49,50,51,52,53,54,55,0,57,58,59,60,61,62,63] +; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll new file mode 100644 index 0000000..c4b209d --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll @@ -0,0 +1,35 @@ +; RUN: opt %s -S -passes=declare-to-assign -o - | FileCheck %s + +; CHECK: call void @llvm.dbg.declare + +define dso_local void @f() sanitize_hwaddress !dbg !9 { +entry: + %a = alloca i32, align 4 + call void @llvm.dbg.declare(metadata ptr %a, metadata !13, metadata !DIExpression()), !dbg !16 + ret void, !dbg !17 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 17.0.0"} +!9 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!10 = !DISubroutineType(types: !11) +!11 = !{null} +!12 = !{!13} +!13 = !DILocalVariable(name: "a", scope: !9, file: !1, line: 1, type: !14) +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!15 = !DILocation(line: 1, column: 12, scope: !9) +!16 = !DILocation(line: 1, column: 16, scope: !9) +!17 = !DILocation(line: 1, column: 19, scope: !9) diff --git a/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll b/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll new file mode 100644 index 0000000..9731771 --- /dev/null +++ b/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1.ll @@ -0,0 +1,16 @@ +define i32 @sub1(i32 %0) { + %2 = add i32 %0, -1 + ret i32 %2 +} + +define i32 @main(i32 %0) { + %2 = call i32 @sub1(i32 %0) + ret i32 %2 +} + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang 18.0.0git", emissionKind: FullDebug) +!2 = !DIFile(filename: "argc_sub1.c", directory: ".") diff --git a/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll b/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll deleted file mode 100644 index 0cdc5e7..0000000 --- a/llvm/test/Examples/OrcV2Examples/Inputs/argc_sub1_elf.ll +++ /dev/null @@ -1,51 +0,0 @@ -; ModuleID = 'argc_sub1.c' - -define i32 @sub1(i32) !dbg !8 { - call void @llvm.dbg.value(metadata i32 %0, metadata !13, metadata !DIExpression()), !dbg !14 - %2 = add nsw i32 %0, -1, !dbg !15 - ret i32 %2, !dbg !16 -} - -define i32 @main(i32, i8** nocapture readnone) !dbg !17 { - call void @llvm.dbg.value(metadata i32 %0, metadata !24, metadata !DIExpression()), !dbg !26 - call void @llvm.dbg.value(metadata i8** %1, metadata !25, metadata !DIExpression()), !dbg !27 - %3 = tail call i32 @sub1(i32 %0), !dbg !28 - ret i32 %3, !dbg !29 -} - -declare void @llvm.dbg.value(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "argc_sub1.c", directory: "Inputs/") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{i32 7, !"PIC Level", i32 2} -!7 = !{!"clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"} -!8 = distinct !DISubprogram(name: "sub1", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) -!9 = !DISubroutineType(types: !10) -!10 = !{!11, !11} -!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !{!13} -!13 = !DILocalVariable(name: "x", arg: 1, scope: !8, file: !1, line: 1, type: !11) -!14 = !DILocation(line: 1, column: 14, scope: !8) -!15 = !DILocation(line: 1, column: 28, scope: !8) -!16 = !DILocation(line: 1, column: 19, scope: !8) -!17 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !18, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !23) -!18 = !DISubroutineType(types: !19) -!19 = !{!11, !11, !20} -!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !21, size: 64) -!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64) -!22 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!23 = !{!24, !25} -!24 = !DILocalVariable(name: "argc", arg: 1, scope: !17, file: !1, line: 2, type: !11) -!25 = !DILocalVariable(name: "argv", arg: 2, scope: !17, file: !1, line: 2, type: !20) -!26 = !DILocation(line: 2, column: 14, scope: !17) -!27 = !DILocation(line: 2, column: 27, scope: !17) -!28 = !DILocation(line: 2, column: 42, scope: !17) -!29 = !DILocation(line: 2, column: 35, scope: !17) diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test index b0b3350..83dbf62 100644 --- a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test @@ -1,15 +1,21 @@ -# This test makes sure that the example builds and executes as expected. +# Check that the debug support plugin appends a jit_code_entry to the +# jit_descriptor of the child process. + # Instructions for debugging can be found in LLJITWithRemoteDebugging.cpp # REQUIRES: default_triple # UNSUPPORTED: target=powerpc64{{.*}} -# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll | FileCheck --check-prefix=CHECK0 %s -# CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll +# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1.ll 2>&1 | FileCheck --check-prefix=CHECK0 %s +# CHECK0: __jit_debug_descriptor.last_entry = [[BEFORE0:0x[0-9a-f]+]] +# CHECK0-NOT: __jit_debug_descriptor.last_entry = [[BEFORE0]] +# CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1.ll # CHECK0: Running: main() # CHECK0: Exit code: 0 -# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll --args 2nd 3rd 4th | FileCheck --check-prefix=CHECK3 %s -# CHECK3: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll +# RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1.ll --args 2nd 3rd 4th 2>&1 | FileCheck --check-prefix=CHECK3 %s +# CHECK3: __jit_debug_descriptor.last_entry = [[BEFORE3:0x[0-9a-f]+]] +# CHECK3-NOT: __jit_debug_descriptor.last_entry = [[BEFORE3]] +# CHECK3: Parsing input IR code from: {{.*}}/Inputs/argc_sub1.ll # CHECK3: Running: main("2nd", "3rd", "4th") # CHECK3: Exit code: 3 diff --git a/llvm/test/MC/AArch64/FP8/dot.s b/llvm/test/MC/AArch64/FP8/dot.s index e755430..cfcc113 100644 --- a/llvm/test/MC/AArch64/FP8/dot.s +++ b/llvm/test/MC/AArch64/FP8/dot.s @@ -44,6 +44,12 @@ fdot v31.4h, v31.8b, v15.2b[0] // CHECK-ERROR: instruction requires: fp8dot2 // CHECK-UNKNOWN: 0f4f03ff <unknown> +fdot v26.8H, v22.16B, v9.2B[0] +// CHECK-INST: fdot v26.8h, v22.16b, v9.2b[0] +// CHECK-ENCODING: [0xda,0x02,0x49,0x4f] +// CHECK-ERROR: instruction requires: fp8dot2 +// CHECK-UNKNOWN: 4f4902da <unknown> + fdot v0.8h, v0.16b, v15.2b[7] // CHECK-INST: fdot v0.8h, v0.16b, v15.2b[7] // CHECK-ENCODING: [0x00,0x08,0x7f,0x4f] diff --git a/llvm/test/MC/AArch64/SME2p1/bfadd-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfadd-diagnostics.s index bc9a112..cadb470 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfadd-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfadd-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Out of range index offset diff --git a/llvm/test/MC/AArch64/SME2p1/bfadd.s b/llvm/test/MC/AArch64/SME2/bfadd.s index 0fb553d..0ba009a 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfadd.s +++ b/llvm/test/MC/AArch64/SME2/bfadd.s @@ -1,300 +1,300 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfadd za.h[w8, 0, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00000000 // CHECK-INST: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x00,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c00 <unknown> bfadd za.h[w8, 0], {z0.h - z1.h} // 11000001-11100100-00011100-00000000 // CHECK-INST: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x00,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c00 <unknown> bfadd za.h[w10, 5, vgx2], {z10.h, z11.h} // 11000001-11100100-01011101-01000101 // CHECK-INST: bfadd za.h[w10, 5, vgx2], { z10.h, z11.h } // CHECK-ENCODING: [0x45,0x5d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45d45 <unknown> bfadd za.h[w10, 5], {z10.h - z11.h} // 11000001-11100100-01011101-01000101 // CHECK-INST: bfadd za.h[w10, 5, vgx2], { z10.h, z11.h } // CHECK-ENCODING: [0x45,0x5d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45d45 <unknown> bfadd za.h[w11, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-01111101-10000111 // CHECK-INST: bfadd za.h[w11, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x87,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d87 <unknown> bfadd za.h[w11, 7], {z12.h - z13.h} // 11000001-11100100-01111101-10000111 // CHECK-INST: bfadd za.h[w11, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x87,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d87 <unknown> bfadd za.h[w11, 7, vgx2], {z30.h, z31.h} // 11000001-11100100-01111111-11000111 // CHECK-INST: bfadd za.h[w11, 7, vgx2], { z30.h, z31.h } // CHECK-ENCODING: [0xc7,0x7f,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47fc7 <unknown> bfadd za.h[w11, 7], {z30.h - z31.h} // 11000001-11100100-01111111-11000111 // CHECK-INST: bfadd za.h[w11, 7, vgx2], { z30.h, z31.h } // CHECK-ENCODING: [0xc7,0x7f,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47fc7 <unknown> bfadd za.h[w8, 5, vgx2], {z16.h, z17.h} // 11000001-11100100-00011110-00000101 // CHECK-INST: bfadd za.h[w8, 5, vgx2], { z16.h, z17.h } // CHECK-ENCODING: [0x05,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41e05 <unknown> bfadd za.h[w8, 5], {z16.h - z17.h} // 11000001-11100100-00011110-00000101 // CHECK-INST: bfadd za.h[w8, 5, vgx2], { z16.h, z17.h } // CHECK-ENCODING: [0x05,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41e05 <unknown> bfadd za.h[w8, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00000001 // CHECK-INST: bfadd za.h[w8, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x01,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c01 <unknown> bfadd za.h[w8, 1], {z0.h - z1.h} // 11000001-11100100-00011100-00000001 // CHECK-INST: bfadd za.h[w8, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x01,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c01 <unknown> bfadd za.h[w10, 0, vgx2], {z18.h, z19.h} // 11000001-11100100-01011110, 01000000 // CHECK-INST: bfadd za.h[w10, 0, vgx2], { z18.h, z19.h } // CHECK-ENCODING: [0x40,0x5e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45e40 <unknown> bfadd za.h[w10, 0], {z18.h - z19.h} // 11000001-11100100-01011110-01000000 // CHECK-INST: bfadd za.h[w10, 0, vgx2], { z18.h, z19.h } // CHECK-ENCODING: [0x40,0x5e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45e40 <unknown> bfadd za.h[w8, 0, vgx2], {z12.h, z13.h} // 11000001-11100100-00011101-10000000 // CHECK-INST: bfadd za.h[w8, 0, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x80,0x1d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41d80 <unknown> bfadd za.h[w8, 0], {z12.h - z13.h} // 11000001-11100100-00011101-10000000 // CHECK-INST: bfadd za.h[w8, 0, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x80,0x1d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41d80 <unknown> bfadd za.h[w10, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-01011100-00000001 // CHECK-INST: bfadd za.h[w10, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x01,0x5c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45c01 <unknown> bfadd za.h[w10, 1], {z0.h - z1.h} // 11000001-11100100-01011100-00000001 // CHECK-INST: bfadd za.h[w10, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x01,0x5c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45c01 <unknown> bfadd za.h[w8, 5, vgx2], {z22.h, z23.h} // 11000001-11100100-00011110, 11000101 // CHECK-INST: bfadd za.h[w8, 5, vgx2], { z22.h, z23.h } // CHECK-ENCODING: [0xc5,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41ec5 <unknown> bfadd za.h[w8, 5], {z22.h - z23.h} // 11000001-11100100-00011110-11000101 // CHECK-INST: bfadd za.h[w8, 5, vgx2], { z22.h, z23.h } // CHECK-ENCODING: [0xc5,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41ec5 <unknown> bfadd za.h[w11, 2, vgx2], {z8.h, z9.h} // 11000001-11100100-01111101-00000010 // CHECK-INST: bfadd za.h[w11, 2, vgx2], { z8.h, z9.h } // CHECK-ENCODING: [0x02,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d02 <unknown> bfadd za.h[w11, 2], {z8.h - z9.h} // 11000001-11100100-01111101-00000010 // CHECK-INST: bfadd za.h[w11, 2, vgx2], { z8.h, z9.h } // CHECK-ENCODING: [0x02,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d02 <unknown> bfadd za.h[w9, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-00111101-10000111 // CHECK-INST: bfadd za.h[w9, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x87,0x3d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e43d87 <unknown> bfadd za.h[w9, 7], {z12.h - z13.h} // 11000001-11100100-00111101-10000111 // CHECK-INST: bfadd za.h[w9, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x87,0x3d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e43d87 <unknown> bfadd za.h[w8, 0, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00000000 // CHECK-INST: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x00,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c00 <unknown> bfadd za.h[w8, 0], {z0.h - z3.h} // 11000001-11100101-00011100-00000000 // CHECK-INST: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x00,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c00 <unknown> bfadd za.h[w10, 5, vgx4], {z8.h - z11.h} // 11000001-11100101-01011101-00000101 // CHECK-INST: bfadd za.h[w10, 5, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x05,0x5d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55d05 <unknown> bfadd za.h[w10, 5], {z8.h - z11.h} // 11000001-11100101-01011101-00000101 // CHECK-INST: bfadd za.h[w10, 5, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x05,0x5d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55d05 <unknown> bfadd za.h[w11, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-01111101-10000111 // CHECK-INST: bfadd za.h[w11, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x87,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d87 <unknown> bfadd za.h[w11, 7], {z12.h - z15.h} // 11000001-11100101-01111101-10000111 // CHECK-INST: bfadd za.h[w11, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x87,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d87 <unknown> bfadd za.h[w11, 7, vgx4], {z28.h - z31.h} // 11000001-11100101-01111111-10000111 // CHECK-INST: bfadd za.h[w11, 7, vgx4], { z28.h - z31.h } // CHECK-ENCODING: [0x87,0x7f,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57f87 <unknown> bfadd za.h[w11, 7], {z28.h - z31.h} // 11000001-11100101-01111111-10000111 // CHECK-INST: bfadd za.h[w11, 7, vgx4], { z28.h - z31.h } // CHECK-ENCODING: [0x87,0x7f,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57f87 <unknown> bfadd za.h[w8, 5, vgx4], {z16.h - z19.h} // 11000001-11100101-00011110-00000101 // CHECK-INST: bfadd za.h[w8, 5, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x05,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e05 <unknown> bfadd za.h[w8, 5], {z16.h - z19.h} // 11000001-11100101-00011110-00000101 // CHECK-INST: bfadd za.h[w8, 5, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x05,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e05 <unknown> bfadd za.h[w8, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00000001 // CHECK-INST: bfadd za.h[w8, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x01,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c01 <unknown> bfadd za.h[w8, 1], {z0.h - z3.h} // 11000001-11100101-00011100-00000001 // CHECK-INST: bfadd za.h[w8, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x01,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c01 <unknown> bfadd za.h[w10, 0, vgx4], {z16.h - z19.h} // 11000001-11100101-01011110-00000000 // CHECK-INST: bfadd za.h[w10, 0, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x00,0x5e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55e00 <unknown> bfadd za.h[w10, 0], {z16.h - z19.h} // 11000001-11100101-01011110-00000000 // CHECK-INST: bfadd za.h[w10, 0, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x00,0x5e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55e00 <unknown> bfadd za.h[w8, 0, vgx4], {z12.h - z15.h} // 11000001-11100101-00011101-10000000 // CHECK-INST: bfadd za.h[w8, 0, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x80,0x1d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51d80 <unknown> bfadd za.h[w8, 0], {z12.h - z15.h} // 11000001-11100101-00011101-10000000 // CHECK-INST: bfadd za.h[w8, 0, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x80,0x1d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51d80 <unknown> bfadd za.h[w10, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-01011100-00000001 // CHECK-INST: bfadd za.h[w10, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x01,0x5c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55c01 <unknown> bfadd za.h[w10, 1], {z0.h - z3.h} // 11000001-11100101-01011100-00000001 // CHECK-INST: bfadd za.h[w10, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x01,0x5c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55c01 <unknown> bfadd za.h[w8, 5, vgx4], {z20.h - z23.h} // 11000001-11100101-00011110-10000101 // CHECK-INST: bfadd za.h[w8, 5, vgx4], { z20.h - z23.h } // CHECK-ENCODING: [0x85,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e85 <unknown> bfadd za.h[w8, 5], {z20.h - z23.h} // 11000001-11100101-00011110-10000101 // CHECK-INST: bfadd za.h[w8, 5, vgx4], { z20.h - z23.h } // CHECK-ENCODING: [0x85,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e85 <unknown> bfadd za.h[w11, 2, vgx4], {z8.h - z11.h} // 11000001-11100101-01111101-00000010 // CHECK-INST: bfadd za.h[w11, 2, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x02,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d02 <unknown> bfadd za.h[w11, 2], {z8.h - z11.h} // 11000001-11100101-01111101-00000010 // CHECK-INST: bfadd za.h[w11, 2, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x02,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d02 <unknown> bfadd za.h[w9, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-00111101-10000111 // CHECK-INST: bfadd za.h[w9, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x87,0x3d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e53d87 <unknown> bfadd za.h[w9, 7], {z12.h - z15.h} // 11000001-11100101-00111101-10000111 // CHECK-INST: bfadd za.h[w9, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x87,0x3d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e53d87 <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfclamp-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfclamp-diagnostics.s index ee48a21..661cfad 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfclamp-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfclamp-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfclamp.s b/llvm/test/MC/AArch64/SME2/bfclamp.s index ebf8500..dc3caec 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfclamp.s +++ b/llvm/test/MC/AArch64/SME2/bfclamp.s @@ -1,60 +1,60 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfclamp {z0.h, z1.h}, z0.h, z0.h // 11000001-00100000-11000000-00000000 // CHECK-INST: bfclamp { z0.h, z1.h }, z0.h, z0.h // CHECK-ENCODING: [0x00,0xc0,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120c000 <unknown> bfclamp {z20.h, z21.h}, z10.h, z21.h // 11000001-00110101-11000001-01010100 // CHECK-INST: bfclamp { z20.h, z21.h }, z10.h, z21.h // CHECK-ENCODING: [0x54,0xc1,0x35,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c135c154 <unknown> bfclamp {z22.h, z23.h}, z13.h, z8.h // 11000001-00101000-11000001-10110110 // CHECK-INST: bfclamp { z22.h, z23.h }, z13.h, z8.h // CHECK-ENCODING: [0xb6,0xc1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128c1b6 <unknown> bfclamp {z30.h, z31.h}, z31.h, z31.h // 11000001-00111111-11000011-11111110 // CHECK-INST: bfclamp { z30.h, z31.h }, z31.h, z31.h // CHECK-ENCODING: [0xfe,0xc3,0x3f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13fc3fe <unknown> bfclamp {z0.h - z3.h}, z0.h, z0.h // 11000001-00100000-11001000-00000000 // CHECK-INST: bfclamp { z0.h - z3.h }, z0.h, z0.h // CHECK-ENCODING: [0x00,0xc8,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120c800 <unknown> bfclamp {z20.h - z23.h}, z10.h, z21.h // 11000001-00110101-11001001-01010100 // CHECK-INST: bfclamp { z20.h - z23.h }, z10.h, z21.h // CHECK-ENCODING: [0x54,0xc9,0x35,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c135c954 <unknown> bfclamp {z20.h - z23.h}, z13.h, z8.h // 11000001-00101000-11001001-10110100 // CHECK-INST: bfclamp { z20.h - z23.h }, z13.h, z8.h // CHECK-ENCODING: [0xb4,0xc9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128c9b4 <unknown> bfclamp {z28.h - z31.h}, z31.h, z31.h // 11000001-00111111-11001011-11111100 // CHECK-INST: bfclamp { z28.h - z31.h }, z31.h, z31.h // CHECK-ENCODING: [0xfc,0xcb,0x3f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13fcbfc <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmax-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmax-diagnostics.s index 55e5755..bbb619e 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmax-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmax-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfmax.s b/llvm/test/MC/AArch64/SME2/bfmax.s index d5af905..657fcbc 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmax.s +++ b/llvm/test/MC/AArch64/SME2/bfmax.s @@ -1,108 +1,108 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmax {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00000000 // CHECK-INST: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x00,0xa1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a100 <unknown> bfmax {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00010100 // CHECK-INST: bfmax { z20.h, z21.h }, { z20.h, z21.h }, z5.h // CHECK-ENCODING: [0x14,0xa1,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a114 <unknown> bfmax {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00010110 // CHECK-INST: bfmax { z22.h, z23.h }, { z22.h, z23.h }, z8.h // CHECK-ENCODING: [0x16,0xa1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a116 <unknown> bfmax {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00011110 // CHECK-INST: bfmax { z30.h, z31.h }, { z30.h, z31.h }, z15.h // CHECK-ENCODING: [0x1e,0xa1,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa11e <unknown> bfmax {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00000000 // CHECK-INST: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x00,0xb1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b100 <unknown> bfmax {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00010100 // CHECK-INST: bfmax { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x14,0xb1,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b114 <unknown> bfmax {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00010110 // CHECK-INST: bfmax { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x16,0xb1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b116 <unknown> bfmax {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00011110 // CHECK-INST: bfmax { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x1e,0xb1,0x3e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13eb11e <unknown> bfmax {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00000000 // CHECK-INST: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x00,0xa9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a900 <unknown> bfmax {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00010100 // CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, z5.h // CHECK-ENCODING: [0x14,0xa9,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a914 <unknown> bfmax {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00010100 // CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, z8.h // CHECK-ENCODING: [0x14,0xa9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a914 <unknown> bfmax {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00011100 // CHECK-INST: bfmax { z28.h - z31.h }, { z28.h - z31.h }, z15.h // CHECK-ENCODING: [0x1c,0xa9,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa91c <unknown> bfmax {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00000000 // CHECK-INST: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x00,0xb9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b900 <unknown> bfmax {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00010100 // CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x14,0xb9,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b914 <unknown> bfmax {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00010100 // CHECK-INST: bfmax { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x14,0xb9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b914 <unknown> bfmax {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00011100 // CHECK-INST: bfmax { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x1c,0xb9,0x3c,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13cb91c <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmaxnm-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmaxnm-diagnostics.s index b1f1511..ab837b6 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmaxnm-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmaxnm-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfmaxnm.s b/llvm/test/MC/AArch64/SME2/bfmaxnm.s index e5b97e8..f61f530 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmaxnm.s +++ b/llvm/test/MC/AArch64/SME2/bfmaxnm.s @@ -1,108 +1,108 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmaxnm {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00100000 // CHECK-INST: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x20,0xa1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a120 <unknown> bfmaxnm {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00110100 // CHECK-INST: bfmaxnm { z20.h, z21.h }, { z20.h, z21.h }, z5.h // CHECK-ENCODING: [0x34,0xa1,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a134 <unknown> bfmaxnm {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00110110 // CHECK-INST: bfmaxnm { z22.h, z23.h }, { z22.h, z23.h }, z8.h // CHECK-ENCODING: [0x36,0xa1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a136 <unknown> bfmaxnm {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00111110 // CHECK-INST: bfmaxnm { z30.h, z31.h }, { z30.h, z31.h }, z15.h // CHECK-ENCODING: [0x3e,0xa1,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa13e <unknown> bfmaxnm {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00100000 // CHECK-INST: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x20,0xb1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b120 <unknown> bfmaxnm {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00110100 // CHECK-INST: bfmaxnm { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x34,0xb1,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b134 <unknown> bfmaxnm {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00110110 // CHECK-INST: bfmaxnm { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x36,0xb1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b136 <unknown> bfmaxnm {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00111110 // CHECK-INST: bfmaxnm { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x3e,0xb1,0x3e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13eb13e <unknown> bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00100000 // CHECK-INST: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x20,0xa9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a920 <unknown> bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00110100 // CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, z5.h // CHECK-ENCODING: [0x34,0xa9,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a934 <unknown> bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00110100 // CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, z8.h // CHECK-ENCODING: [0x34,0xa9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a934 <unknown> bfmaxnm {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00111100 // CHECK-INST: bfmaxnm { z28.h - z31.h }, { z28.h - z31.h }, z15.h // CHECK-ENCODING: [0x3c,0xa9,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa93c <unknown> bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00100000 // CHECK-INST: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x20,0xb9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b920 <unknown> bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00110100 // CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x34,0xb9,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b934 <unknown> bfmaxnm {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00110100 // CHECK-INST: bfmaxnm { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x34,0xb9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b934 <unknown> bfmaxnm {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00111100 // CHECK-INST: bfmaxnm { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x3c,0xb9,0x3c,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13cb93c <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmin-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmin-diagnostics.s index 72ee818..41f1036 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmin-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmin-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfmin.s b/llvm/test/MC/AArch64/SME2/bfmin.s index 3ba2be5..6612e3c 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmin.s +++ b/llvm/test/MC/AArch64/SME2/bfmin.s @@ -1,108 +1,108 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmin {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00000001 // CHECK-INST: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x01,0xa1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a101 <unknown> bfmin {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00010101 // CHECK-INST: bfmin { z20.h, z21.h }, { z20.h, z21.h }, z5.h // CHECK-ENCODING: [0x15,0xa1,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a115 <unknown> bfmin {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00010111 // CHECK-INST: bfmin { z22.h, z23.h }, { z22.h, z23.h }, z8.h // CHECK-ENCODING: [0x17,0xa1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a117 <unknown> bfmin {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00011111 // CHECK-INST: bfmin { z30.h, z31.h }, { z30.h, z31.h }, z15.h // CHECK-ENCODING: [0x1f,0xa1,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa11f <unknown> bfmin {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00000001 // CHECK-INST: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x01,0xb1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b101 <unknown> bfmin {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00010101 // CHECK-INST: bfmin { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x15,0xb1,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b115 <unknown> bfmin {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00010111 // CHECK-INST: bfmin { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x17,0xb1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b117 <unknown> bfmin {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00011111 // CHECK-INST: bfmin { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x1f,0xb1,0x3e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13eb11f <unknown> bfmin {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00000001 // CHECK-INST: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x01,0xa9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a901 <unknown> bfmin {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00010101 // CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, z5.h // CHECK-ENCODING: [0x15,0xa9,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a915 <unknown> bfmin {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00010101 // CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, z8.h // CHECK-ENCODING: [0x15,0xa9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a915 <unknown> bfmin {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00011101 // CHECK-INST: bfmin { z28.h - z31.h }, { z28.h - z31.h }, z15.h // CHECK-ENCODING: [0x1d,0xa9,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa91d <unknown> bfmin {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00000001 // CHECK-INST: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x01,0xb9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b901 <unknown> bfmin {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00010101 // CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x15,0xb9,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b915 <unknown> bfmin {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00010101 // CHECK-INST: bfmin { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x15,0xb9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b915 <unknown> bfmin {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00011101 // CHECK-INST: bfmin { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x1d,0xb9,0x3c,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13cb91d <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfminnm-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfminnm-diagnostics.s index 2d161bf..14485e9 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfminnm-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfminnm-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfminnm.s b/llvm/test/MC/AArch64/SME2/bfminnm.s index cfaa3c1..4a48a0d 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfminnm.s +++ b/llvm/test/MC/AArch64/SME2/bfminnm.s @@ -1,113 +1,113 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfminnm {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-00100000-10100001-00100001 // CHECK-INST: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x21,0xa1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a121 <unknown> bfminnm {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-00100101-10100001-00110101 // CHECK-INST: bfminnm { z20.h, z21.h }, { z20.h, z21.h }, z5.h // CHECK-ENCODING: [0x35,0xa1,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a135 <unknown> bfminnm {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-00101000-10100001-00110111 // CHECK-INST: bfminnm { z22.h, z23.h }, { z22.h, z23.h }, z8.h // CHECK-ENCODING: [0x37,0xa1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a137 <unknown> bfminnm {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-00101111-10100001-00111111 // CHECK-INST: bfminnm { z30.h, z31.h }, { z30.h, z31.h }, z15.h // CHECK-ENCODING: [0x3f,0xa1,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa13f <unknown> bfminnm {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-00100000-10110001-00100001 // CHECK-INST: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x21,0xb1,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b121 <unknown> bfminnm {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-00110100-10110001-00110101 // CHECK-INST: bfminnm { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x35,0xb1,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b135 <unknown> bfminnm {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-00101000-10110001-00110111 // CHECK-INST: bfminnm { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x37,0xb1,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b137 <unknown> bfminnm {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-00111110-10110001-00111111 // CHECK-INST: bfminnm { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x3f,0xb1,0x3e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13eb13f <unknown> bfminnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-00100000-10101001-00100001 // CHECK-INST: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x21,0xa9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120a921 <unknown> bfminnm {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-00100101-10101001-00110101 // CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, z5.h // CHECK-ENCODING: [0x35,0xa9,0x25,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c125a935 <unknown> bfminnm {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-00101000-10101001-00110101 // CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, z8.h // CHECK-ENCODING: [0x35,0xa9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128a935 <unknown> bfminnm {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-00101111-10101001-00111101 // CHECK-INST: bfminnm { z28.h - z31.h }, { z28.h - z31.h }, z15.h // CHECK-ENCODING: [0x3d,0xa9,0x2f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c12fa93d <unknown> bfminnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-00100000-10111001-00100001 // CHECK-INST: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x21,0xb9,0x20,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c120b921 <unknown> bfminnm {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-00110100-10111001-00110101 // CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x35,0xb9,0x34,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c134b935 <unknown> bfminnm {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-00101000-10111001-00110101 // CHECK-INST: bfminnm { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x35,0xb9,0x28,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c128b935 <unknown> bfminnm {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-00111100-10111001-00111101 // CHECK-INST: bfminnm { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x3d,0xb9,0x3c,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c13cb93d <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmla-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmla-diagnostics.s index 42bb35d..efd2624 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmla-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmla-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfmla.s b/llvm/test/MC/AArch64/SME2/bfmla.s index 4c053fe..75ccccc 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmla.s +++ b/llvm/test/MC/AArch64/SME2/bfmla.s @@ -1,876 +1,876 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmla za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00011100-00000000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x00,0x1c,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601c00 <unknown> bfmla za.h[w8, 0], {z0.h - z1.h}, z0.h // 11000001-01100000-00011100-00000000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x00,0x1c,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601c00 <unknown> bfmla za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01011101-01000101 // CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h // CHECK-ENCODING: [0x45,0x5d,0x65,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1655d45 <unknown> bfmla za.h[w10, 5], {z10.h - z11.h}, z5.h // 11000001-01100101-01011101-01000101 // CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h // CHECK-ENCODING: [0x45,0x5d,0x65,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1655d45 <unknown> bfmla za.h[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h // CHECK-ENCODING: [0xa7,0x7d,0x68,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1687da7 <unknown> bfmla za.h[w11, 7], {z13.h - z14.h}, z8.h // 11000001-01101000-01111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h // CHECK-ENCODING: [0xa7,0x7d,0x68,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1687da7 <unknown> bfmla za.h[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01111111-11100111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h // CHECK-ENCODING: [0xe7,0x7f,0x6f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16f7fe7 <unknown> bfmla za.h[w11, 7], {z31.h - z0.h}, z15.h // 11000001-01101111-01111111-11100111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h // CHECK-ENCODING: [0xe7,0x7f,0x6f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16f7fe7 <unknown> bfmla za.h[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h // CHECK-ENCODING: [0x25,0x1e,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601e25 <unknown> bfmla za.h[w8, 5], {z17.h - z18.h}, z0.h // 11000001-01100000-00011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h // CHECK-ENCODING: [0x25,0x1e,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601e25 <unknown> bfmla za.h[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00011100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h // CHECK-ENCODING: [0x21,0x1c,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1c21 <unknown> bfmla za.h[w8, 1], {z1.h - z2.h}, z14.h // 11000001-01101110-00011100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h // CHECK-ENCODING: [0x21,0x1c,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1c21 <unknown> bfmla za.h[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01011110-01100000 // CHECK-INST: bfmla za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h // CHECK-ENCODING: [0x60,0x5e,0x64,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1645e60 <unknown> bfmla za.h[w10, 0], {z19.h - z20.h}, z4.h // 11000001-01100100-01011110-01100000 // CHECK-INST: bfmla za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h // CHECK-ENCODING: [0x60,0x5e,0x64,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1645e60 <unknown> bfmla za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00011101-10000000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h // CHECK-ENCODING: [0x80,0x1d,0x62,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1621d80 <unknown> bfmla za.h[w8, 0], {z12.h - z13.h}, z2.h // 11000001-01100010-00011101-10000000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h // CHECK-ENCODING: [0x80,0x1d,0x62,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1621d80 <unknown> bfmla za.h[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01011100-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h // CHECK-ENCODING: [0x21,0x5c,0x6a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16a5c21 <unknown> bfmla za.h[w10, 1], {z1.h - z2.h}, z10.h // 11000001-01101010-01011100-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h // CHECK-ENCODING: [0x21,0x5c,0x6a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16a5c21 <unknown> bfmla za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00011110-11000101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h // CHECK-ENCODING: [0xc5,0x1e,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1ec5 <unknown> bfmla za.h[w8, 5], {z22.h - z23.h}, z14.h // 11000001-01101110-00011110-11000101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h // CHECK-ENCODING: [0xc5,0x1e,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1ec5 <unknown> bfmla za.h[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01111101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h // CHECK-ENCODING: [0x22,0x7d,0x61,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1617d22 <unknown> bfmla za.h[w11, 2], {z9.h - z10.h}, z1.h // 11000001-01100001-01111101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h // CHECK-ENCODING: [0x22,0x7d,0x61,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1617d22 <unknown> bfmla za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00111101-10000111 // CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h // CHECK-ENCODING: [0x87,0x3d,0x6b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16b3d87 <unknown> bfmla za.h[w9, 7], {z12.h - z13.h}, z11.h // 11000001-01101011-00111101-10000111 // CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h // CHECK-ENCODING: [0x87,0x3d,0x6b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16b3d87 <unknown> bfmla za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-00010000-00010000-00100000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] // CHECK-ENCODING: [0x20,0x10,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101020 <unknown> bfmla za.h[w8, 0], {z0.h - z1.h}, z0.h[0] // 11000001-00010000-00010000-00100000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] // CHECK-ENCODING: [0x20,0x10,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101020 <unknown> bfmla za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h[2] // 11000001-00010101-01010101-01100101 // CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2] // CHECK-ENCODING: [0x65,0x55,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1155565 <unknown> bfmla za.h[w10, 5], {z10.h - z11.h}, z5.h[2] // 11000001-00010101-01010101-01100101 // CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2] // CHECK-ENCODING: [0x65,0x55,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1155565 <unknown> bfmla za.h[w11, 7, vgx2], {z12.h, z13.h}, z8.h[6] // 11000001-00011000-01111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6] // CHECK-ENCODING: [0xa7,0x7d,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1187da7 <unknown> bfmla za.h[w11, 7], {z12.h - z13.h}, z8.h[6] // 11000001-00011000-01111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6] // CHECK-ENCODING: [0xa7,0x7d,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1187da7 <unknown> bfmla za.h[w11, 7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001-00011111-01111111-11101111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7] // CHECK-ENCODING: [0xef,0x7f,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11f7fef <unknown> bfmla za.h[w11, 7], {z30.h - z31.h}, z15.h[7] // 11000001-00011111-01111111-11101111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7] // CHECK-ENCODING: [0xef,0x7f,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11f7fef <unknown> bfmla za.h[w8, 5, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001-00010000-00011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6] // CHECK-ENCODING: [0x25,0x1e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101e25 <unknown> bfmla za.h[w8, 5], {z16.h - z17.h}, z0.h[6] // 11000001-00010000-00011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6] // CHECK-ENCODING: [0x25,0x1e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101e25 <unknown> bfmla za.h[w8, 1, vgx2], {z0.h, z1.h}, z14.h[2] // 11000001-00011110-00010100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2] // CHECK-ENCODING: [0x21,0x14,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1421 <unknown> bfmla za.h[w8, 1], {z0.h - z1.h}, z14.h[2] // 11000001-00011110-00010100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2] // CHECK-ENCODING: [0x21,0x14,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1421 <unknown> bfmla za.h[w10, 0, vgx2], {z18.h, z19.h}, z4.h[3] // 11000001-00010100-01010110-01101000 // CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3] // CHECK-ENCODING: [0x68,0x56,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1145668 <unknown> bfmla za.h[w10, 0], {z18.h - z19.h}, z4.h[3] // 11000001-00010100-01010110-01101000 // CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3] // CHECK-ENCODING: [0x68,0x56,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1145668 <unknown> bfmla za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h[4] // 11000001-00010010-00011001-10100000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4] // CHECK-ENCODING: [0xa0,0x19,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11219a0 <unknown> bfmla za.h[w8, 0], {z12.h - z13.h}, z2.h[4] // 11000001-00010010-00011001-10100000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4] // CHECK-ENCODING: [0xa0,0x19,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11219a0 <unknown> bfmla za.h[w10, 1, vgx2], {z0.h, z1.h}, z10.h[4] // 11000001-00011010-01011000-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4] // CHECK-ENCODING: [0x21,0x58,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11a5821 <unknown> bfmla za.h[w10, 1], {z0.h - z1.h}, z10.h[4] // 11000001-00011010-01011000-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4] // CHECK-ENCODING: [0x21,0x58,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11a5821 <unknown> bfmla za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h[5] // 11000001-00011110-00011010-11101101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5] // CHECK-ENCODING: [0xed,0x1a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1aed <unknown> bfmla za.h[w8, 5], {z22.h - z23.h}, z14.h[5] // 11000001-00011110-00011010-11101101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5] // CHECK-ENCODING: [0xed,0x1a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1aed <unknown> bfmla za.h[w11, 2, vgx2], {z8.h, z9.h}, z1.h[2] // 11000001-00010001-01110101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2] // CHECK-ENCODING: [0x22,0x75,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1117522 <unknown> bfmla za.h[w11, 2], {z8.h - z9.h}, z1.h[2] // 11000001-00010001-01110101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2] // CHECK-ENCODING: [0x22,0x75,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1117522 <unknown> bfmla za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h[4] // 11000001-00011011-00111001-10100111 // CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4] // CHECK-ENCODING: [0xa7,0x39,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11b39a7 <unknown> bfmla za.h[w9, 7], {z12.h - z13.h}, z11.h[4] // 11000001-00011011-00111001-10100111 // CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4] // CHECK-ENCODING: [0xa7,0x39,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11b39a7 <unknown> bfmla za.h[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000-00010000-00001000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x08,0x10,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e01008 <unknown> bfmla za.h[w8, 0], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00010000-00001000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x08,0x10,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e01008 <unknown> bfmla za.h[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100-01010001-01001101 // CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x4d,0x51,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f4514d <unknown> bfmla za.h[w10, 5], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01010001-01001101 // CHECK-INST: bfmla za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x4d,0x51,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f4514d <unknown> bfmla za.h[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000-01110001-10001111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x8f,0x71,0xe8,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e8718f <unknown> bfmla za.h[w11, 7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01110001-10001111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x8f,0x71,0xe8,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e8718f <unknown> bfmla za.h[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110-01110011-11001111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xcf,0x73,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe73cf <unknown> bfmla za.h[w11, 7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01110011-11001111 // CHECK-INST: bfmla za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xcf,0x73,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe73cf <unknown> bfmla za.h[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000-00010010-00001101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } // CHECK-ENCODING: [0x0d,0x12,0xf0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f0120d <unknown> bfmla za.h[w8, 5], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00010010-00001101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } // CHECK-ENCODING: [0x0d,0x12,0xf0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f0120d <unknown> bfmla za.h[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110-00010000-00001001 // CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x09,0x10,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe1009 <unknown> bfmla za.h[w8, 1], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00010000-00001001 // CHECK-INST: bfmla za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x09,0x10,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe1009 <unknown> bfmla za.h[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100-01010010-01001000 // CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x48,0x52,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f45248 <unknown> bfmla za.h[w10, 0], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01010010-01001000 // CHECK-INST: bfmla za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x48,0x52,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f45248 <unknown> bfmla za.h[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010-00010001-10001000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } // CHECK-ENCODING: [0x88,0x11,0xe2,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e21188 <unknown> bfmla za.h[w8, 0], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00010001-10001000 // CHECK-INST: bfmla za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } // CHECK-ENCODING: [0x88,0x11,0xe2,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e21188 <unknown> bfmla za.h[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010-01010000-00001001 // CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } // CHECK-ENCODING: [0x09,0x50,0xfa,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fa5009 <unknown> bfmla za.h[w10, 1], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01010000-00001001 // CHECK-INST: bfmla za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } // CHECK-ENCODING: [0x09,0x50,0xfa,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fa5009 <unknown> bfmla za.h[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110-00010010-11001101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xcd,0x12,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe12cd <unknown> bfmla za.h[w8, 5], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00010010-11001101 // CHECK-INST: bfmla za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xcd,0x12,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe12cd <unknown> bfmla za.h[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000-01110001-00001010 // CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x0a,0x71,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e0710a <unknown> bfmla za.h[w11, 2], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01110001-00001010 // CHECK-INST: bfmla za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x0a,0x71,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e0710a <unknown> bfmla za.h[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010-00110001-10001111 // CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } // CHECK-ENCODING: [0x8f,0x31,0xea,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1ea318f <unknown> bfmla za.h[w9, 7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00110001-10001111 // CHECK-INST: bfmla za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } // CHECK-ENCODING: [0x8f,0x31,0xea,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1ea318f <unknown> bfmla za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00000000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x00,0x1c,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701c00 <unknown> bfmla za.h[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00000000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x00,0x1c,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701c00 <unknown> bfmla za.h[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01000101 // CHECK-INST: bfmla za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h // CHECK-ENCODING: [0x45,0x5d,0x75,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1755d45 <unknown> bfmla za.h[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01000101 // CHECK-INST: bfmla za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h // CHECK-ENCODING: [0x45,0x5d,0x75,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1755d45 <unknown> bfmla za.h[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h // CHECK-ENCODING: [0xa7,0x7d,0x78,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1787da7 <unknown> bfmla za.h[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h // CHECK-ENCODING: [0xa7,0x7d,0x78,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1787da7 <unknown> bfmla za.h[w11, 7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11100111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h // CHECK-ENCODING: [0xe7,0x7f,0x7f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17f7fe7 <unknown> bfmla za.h[w11, 7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11100111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h // CHECK-ENCODING: [0xe7,0x7f,0x7f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17f7fe7 <unknown> bfmla za.h[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h // CHECK-ENCODING: [0x25,0x1e,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701e25 <unknown> bfmla za.h[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h // CHECK-ENCODING: [0x25,0x1e,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701e25 <unknown> bfmla za.h[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h // CHECK-ENCODING: [0x21,0x1c,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1c21 <unknown> bfmla za.h[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h // CHECK-ENCODING: [0x21,0x1c,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1c21 <unknown> bfmla za.h[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01100000 // CHECK-INST: bfmla za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h // CHECK-ENCODING: [0x60,0x5e,0x74,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1745e60 <unknown> bfmla za.h[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01100000 // CHECK-INST: bfmla za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h // CHECK-ENCODING: [0x60,0x5e,0x74,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1745e60 <unknown> bfmla za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10000000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h // CHECK-ENCODING: [0x80,0x1d,0x72,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1721d80 <unknown> bfmla za.h[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10000000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h // CHECK-ENCODING: [0x80,0x1d,0x72,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1721d80 <unknown> bfmla za.h[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h // CHECK-ENCODING: [0x21,0x5c,0x7a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17a5c21 <unknown> bfmla za.h[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h // CHECK-ENCODING: [0x21,0x5c,0x7a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17a5c21 <unknown> bfmla za.h[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11000101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h // CHECK-ENCODING: [0xc5,0x1e,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1ec5 <unknown> bfmla za.h[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11000101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h // CHECK-ENCODING: [0xc5,0x1e,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1ec5 <unknown> bfmla za.h[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h // CHECK-ENCODING: [0x22,0x7d,0x71,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1717d22 <unknown> bfmla za.h[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h // CHECK-ENCODING: [0x22,0x7d,0x71,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1717d22 <unknown> bfmla za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10000111 // CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h // CHECK-ENCODING: [0x87,0x3d,0x7b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17b3d87 <unknown> bfmla za.h[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10000111 // CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h // CHECK-ENCODING: [0x87,0x3d,0x7b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17b3d87 <unknown> bfmla za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00100000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] // CHECK-ENCODING: [0x20,0x90,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109020 <unknown> bfmla za.h[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00100000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] // CHECK-ENCODING: [0x20,0x90,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109020 <unknown> bfmla za.h[w10, 5, vgx4], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00100101 // CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2] // CHECK-ENCODING: [0x25,0xd5,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c115d525 <unknown> bfmla za.h[w10, 5], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00100101 // CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2] // CHECK-ENCODING: [0x25,0xd5,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c115d525 <unknown> bfmla za.h[w11, 7, vgx4], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6] // CHECK-ENCODING: [0xa7,0xfd,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c118fda7 <unknown> bfmla za.h[w11, 7], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10100111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6] // CHECK-ENCODING: [0xa7,0xfd,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c118fda7 <unknown> bfmla za.h[w11, 7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10101111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7] // CHECK-ENCODING: [0xaf,0xff,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11fffaf <unknown> bfmla za.h[w11, 7], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10101111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7] // CHECK-ENCODING: [0xaf,0xff,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11fffaf <unknown> bfmla za.h[w8, 5, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6] // CHECK-ENCODING: [0x25,0x9e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109e25 <unknown> bfmla za.h[w8, 5], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00100101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6] // CHECK-ENCODING: [0x25,0x9e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109e25 <unknown> bfmla za.h[w8, 1, vgx4], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2] // CHECK-ENCODING: [0x21,0x94,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9421 <unknown> bfmla za.h[w8, 1], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00100001 // CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2] // CHECK-ENCODING: [0x21,0x94,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9421 <unknown> bfmla za.h[w10, 0, vgx4], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00101000 // CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3] // CHECK-ENCODING: [0x28,0xd6,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c114d628 <unknown> bfmla za.h[w10, 0], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00101000 // CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3] // CHECK-ENCODING: [0x28,0xd6,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c114d628 <unknown> bfmla za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10100000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4] // CHECK-ENCODING: [0xa0,0x99,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11299a0 <unknown> bfmla za.h[w8, 0], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10100000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4] // CHECK-ENCODING: [0xa0,0x99,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11299a0 <unknown> bfmla za.h[w10, 1, vgx4], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4] // CHECK-ENCODING: [0x21,0xd8,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11ad821 <unknown> bfmla za.h[w10, 1], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00100001 // CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4] // CHECK-ENCODING: [0x21,0xd8,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11ad821 <unknown> bfmla za.h[w8, 5, vgx4], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10101101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5] // CHECK-ENCODING: [0xad,0x9a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9aad <unknown> bfmla za.h[w8, 5], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10101101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5] // CHECK-ENCODING: [0xad,0x9a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9aad <unknown> bfmla za.h[w11, 2, vgx4], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2] // CHECK-ENCODING: [0x22,0xf5,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c111f522 <unknown> bfmla za.h[w11, 2], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00100010 // CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2] // CHECK-ENCODING: [0x22,0xf5,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c111f522 <unknown> bfmla za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10100111 // CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4] // CHECK-ENCODING: [0xa7,0xb9,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11bb9a7 <unknown> bfmla za.h[w9, 7], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10100111 // CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4] // CHECK-ENCODING: [0xa7,0xb9,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11bb9a7 <unknown> bfmla za.h[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00001000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x08,0x10,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11008 <unknown> bfmla za.h[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00001000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x08,0x10,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11008 <unknown> bfmla za.h[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00001101 // CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x0d,0x51,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f5510d <unknown> bfmla za.h[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00001101 // CHECK-INST: bfmla za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x0d,0x51,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f5510d <unknown> bfmla za.h[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10001111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x8f,0x71,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9718f <unknown> bfmla za.h[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10001111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x8f,0x71,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9718f <unknown> bfmla za.h[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10001111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x8f,0x73,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd738f <unknown> bfmla za.h[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10001111 // CHECK-INST: bfmla za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x8f,0x73,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd738f <unknown> bfmla za.h[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00001101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } // CHECK-ENCODING: [0x0d,0x12,0xf1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f1120d <unknown> bfmla za.h[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00001101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } // CHECK-ENCODING: [0x0d,0x12,0xf1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f1120d <unknown> bfmla za.h[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00001001 // CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x09,0x10,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd1009 <unknown> bfmla za.h[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00001001 // CHECK-INST: bfmla za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x09,0x10,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd1009 <unknown> bfmla za.h[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00001000 // CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x08,0x52,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f55208 <unknown> bfmla za.h[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00001000 // CHECK-INST: bfmla za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x08,0x52,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f55208 <unknown> bfmla za.h[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10001000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x88,0x11,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11188 <unknown> bfmla za.h[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10001000 // CHECK-INST: bfmla za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x88,0x11,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11188 <unknown> bfmla za.h[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00001001 // CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } // CHECK-ENCODING: [0x09,0x50,0xf9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f95009 <unknown> bfmla za.h[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00001001 // CHECK-INST: bfmla za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } // CHECK-ENCODING: [0x09,0x50,0xf9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f95009 <unknown> bfmla za.h[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10001101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x8d,0x12,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd128d <unknown> bfmla za.h[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10001101 // CHECK-INST: bfmla za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x8d,0x12,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd128d <unknown> bfmla za.h[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00001010 // CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x0a,0x71,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e1710a <unknown> bfmla za.h[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00001010 // CHECK-INST: bfmla za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x0a,0x71,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e1710a <unknown> bfmla za.h[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10001111 // CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x8f,0x31,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9318f <unknown> bfmla za.h[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10001111 // CHECK-INST: bfmla za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x8f,0x31,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9318f <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmls-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmls-diagnostics.s index 4174e24..d6e7713 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmls-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmls-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list diff --git a/llvm/test/MC/AArch64/SME2p1/bfmls.s b/llvm/test/MC/AArch64/SME2/bfmls.s index 631da1e..8d5bdc4 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmls.s +++ b/llvm/test/MC/AArch64/SME2/bfmls.s @@ -1,876 +1,876 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmls za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00011100-00001000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x08,0x1c,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601c08 <unknown> bfmls za.h[w8, 0], {z0.h - z1.h}, z0.h // 11000001-01100000-00011100-00001000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h // CHECK-ENCODING: [0x08,0x1c,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601c08 <unknown> bfmls za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01011101-01001101 // CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h // CHECK-ENCODING: [0x4d,0x5d,0x65,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1655d4d <unknown> bfmls za.h[w10, 5], {z10.h - z11.h}, z5.h // 11000001-01100101-01011101-01001101 // CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h // CHECK-ENCODING: [0x4d,0x5d,0x65,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1655d4d <unknown> bfmls za.h[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01111101-10101111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h // CHECK-ENCODING: [0xaf,0x7d,0x68,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1687daf <unknown> bfmls za.h[w11, 7], {z13.h - z14.h}, z8.h // 11000001-01101000-01111101-10101111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z13.h, z14.h }, z8.h // CHECK-ENCODING: [0xaf,0x7d,0x68,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1687daf <unknown> bfmls za.h[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01111111-11101111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h // CHECK-ENCODING: [0xef,0x7f,0x6f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16f7fef <unknown> bfmls za.h[w11, 7], {z31.h - z0.h}, z15.h // 11000001-01101111-01111111-11101111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z31.h, z0.h }, z15.h // CHECK-ENCODING: [0xef,0x7f,0x6f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16f7fef <unknown> bfmls za.h[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00011110-00101101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h // CHECK-ENCODING: [0x2d,0x1e,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601e2d <unknown> bfmls za.h[w8, 5], {z17.h - z18.h}, z0.h // 11000001-01100000-00011110-00101101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z17.h, z18.h }, z0.h // CHECK-ENCODING: [0x2d,0x1e,0x60,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1601e2d <unknown> bfmls za.h[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00011100-00101001 // CHECK-INST: bfmls za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h // CHECK-ENCODING: [0x29,0x1c,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1c29 <unknown> bfmls za.h[w8, 1], {z1.h - z2.h}, z14.h // 11000001-01101110-00011100-00101001 // CHECK-INST: bfmls za.h[w8, 1, vgx2], { z1.h, z2.h }, z14.h // CHECK-ENCODING: [0x29,0x1c,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1c29 <unknown> bfmls za.h[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01011110-01101000 // CHECK-INST: bfmls za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h // CHECK-ENCODING: [0x68,0x5e,0x64,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1645e68 <unknown> bfmls za.h[w10, 0], {z19.h - z20.h}, z4.h // 11000001-01100100-01011110-01101000 // CHECK-INST: bfmls za.h[w10, 0, vgx2], { z19.h, z20.h }, z4.h // CHECK-ENCODING: [0x68,0x5e,0x64,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1645e68 <unknown> bfmls za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00011101-10001000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h // CHECK-ENCODING: [0x88,0x1d,0x62,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1621d88 <unknown> bfmls za.h[w8, 0], {z12.h - z13.h}, z2.h // 11000001-01100010-00011101-10001000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h // CHECK-ENCODING: [0x88,0x1d,0x62,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1621d88 <unknown> bfmls za.h[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01011100-00101001 // CHECK-INST: bfmls za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h // CHECK-ENCODING: [0x29,0x5c,0x6a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16a5c29 <unknown> bfmls za.h[w10, 1], {z1.h - z2.h}, z10.h // 11000001-01101010-01011100-00101001 // CHECK-INST: bfmls za.h[w10, 1, vgx2], { z1.h, z2.h }, z10.h // CHECK-ENCODING: [0x29,0x5c,0x6a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16a5c29 <unknown> bfmls za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00011110-11001101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h // CHECK-ENCODING: [0xcd,0x1e,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1ecd <unknown> bfmls za.h[w8, 5], {z22.h - z23.h}, z14.h // 11000001-01101110-00011110-11001101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h // CHECK-ENCODING: [0xcd,0x1e,0x6e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16e1ecd <unknown> bfmls za.h[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01111101-00101010 // CHECK-INST: bfmls za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h // CHECK-ENCODING: [0x2a,0x7d,0x61,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1617d2a <unknown> bfmls za.h[w11, 2], {z9.h - z10.h}, z1.h // 11000001-01100001-01111101-00101010 // CHECK-INST: bfmls za.h[w11, 2, vgx2], { z9.h, z10.h }, z1.h // CHECK-ENCODING: [0x2a,0x7d,0x61,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1617d2a <unknown> bfmls za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00111101-10001111 // CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h // CHECK-ENCODING: [0x8f,0x3d,0x6b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16b3d8f <unknown> bfmls za.h[w9, 7], {z12.h - z13.h}, z11.h // 11000001-01101011-00111101-10001111 // CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h // CHECK-ENCODING: [0x8f,0x3d,0x6b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c16b3d8f <unknown> bfmls za.h[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-00010000-00010000-00110000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] // CHECK-ENCODING: [0x30,0x10,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101030 <unknown> bfmls za.h[w8, 0], {z0.h - z1.h}, z0.h[0] // 11000001-00010000-00010000-00110000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] // CHECK-ENCODING: [0x30,0x10,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101030 <unknown> bfmls za.h[w10, 5, vgx2], {z10.h, z11.h}, z5.h[2] // 11000001-00010101-01010101-01110101 // CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2] // CHECK-ENCODING: [0x75,0x55,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1155575 <unknown> bfmls za.h[w10, 5], {z10.h - z11.h}, z5.h[2] // 11000001-00010101-01010101-01110101 // CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, z5.h[2] // CHECK-ENCODING: [0x75,0x55,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1155575 <unknown> bfmls za.h[w11, 7, vgx2], {z12.h, z13.h}, z8.h[6] // 11000001-00011000-01111101-10110111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6] // CHECK-ENCODING: [0xb7,0x7d,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1187db7 <unknown> bfmls za.h[w11, 7], {z12.h - z13.h}, z8.h[6] // 11000001-00011000-01111101-10110111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, z8.h[6] // CHECK-ENCODING: [0xb7,0x7d,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1187db7 <unknown> bfmls za.h[w11, 7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001-00011111-01111111-11111111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7] // CHECK-ENCODING: [0xff,0x7f,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11f7fff <unknown> bfmls za.h[w11, 7], {z30.h - z31.h}, z15.h[7] // 11000001-00011111-01111111-11111111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, z15.h[7] // CHECK-ENCODING: [0xff,0x7f,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11f7fff <unknown> bfmls za.h[w8, 5, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001-00010000-00011110-00110101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6] // CHECK-ENCODING: [0x35,0x1e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101e35 <unknown> bfmls za.h[w8, 5], {z16.h - z17.h}, z0.h[6] // 11000001-00010000-00011110-00110101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, z0.h[6] // CHECK-ENCODING: [0x35,0x1e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1101e35 <unknown> bfmls za.h[w8, 1, vgx2], {z0.h, z1.h}, z14.h[2] // 11000001-00011110-00010100-00110001 // CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2] // CHECK-ENCODING: [0x31,0x14,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1431 <unknown> bfmls za.h[w8, 1], {z0.h - z1.h}, z14.h[2] // 11000001-00011110-00010100-00110001 // CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, z14.h[2] // CHECK-ENCODING: [0x31,0x14,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1431 <unknown> bfmls za.h[w10, 0, vgx2], {z18.h, z19.h}, z4.h[3] // 11000001-00010100-01010110-01111000 // CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3] // CHECK-ENCODING: [0x78,0x56,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1145678 <unknown> bfmls za.h[w10, 0], {z18.h - z19.h}, z4.h[3] // 11000001-00010100-01010110-01111000 // CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, z4.h[3] // CHECK-ENCODING: [0x78,0x56,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1145678 <unknown> bfmls za.h[w8, 0, vgx2], {z12.h, z13.h}, z2.h[4] // 11000001-00010010-00011001-10110000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4] // CHECK-ENCODING: [0xb0,0x19,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11219b0 <unknown> bfmls za.h[w8, 0], {z12.h - z13.h}, z2.h[4] // 11000001-00010010-00011001-10110000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, z2.h[4] // CHECK-ENCODING: [0xb0,0x19,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11219b0 <unknown> bfmls za.h[w10, 1, vgx2], {z0.h, z1.h}, z10.h[4] // 11000001-00011010-01011000-00110001 // CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4] // CHECK-ENCODING: [0x31,0x58,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11a5831 <unknown> bfmls za.h[w10, 1], {z0.h - z1.h}, z10.h[4] // 11000001-00011010-01011000-00110001 // CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, z10.h[4] // CHECK-ENCODING: [0x31,0x58,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11a5831 <unknown> bfmls za.h[w8, 5, vgx2], {z22.h, z23.h}, z14.h[5] // 11000001-00011110-00011010-11111101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5] // CHECK-ENCODING: [0xfd,0x1a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1afd <unknown> bfmls za.h[w8, 5], {z22.h - z23.h}, z14.h[5] // 11000001-00011110-00011010-11111101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, z14.h[5] // CHECK-ENCODING: [0xfd,0x1a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e1afd <unknown> bfmls za.h[w11, 2, vgx2], {z8.h, z9.h}, z1.h[2] // 11000001-00010001-01110101-00110010 // CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2] // CHECK-ENCODING: [0x32,0x75,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1117532 <unknown> bfmls za.h[w11, 2], {z8.h - z9.h}, z1.h[2] // 11000001-00010001-01110101-00110010 // CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, z1.h[2] // CHECK-ENCODING: [0x32,0x75,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1117532 <unknown> bfmls za.h[w9, 7, vgx2], {z12.h, z13.h}, z11.h[4] // 11000001-00011011-00111001-10110111 // CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4] // CHECK-ENCODING: [0xb7,0x39,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11b39b7 <unknown> bfmls za.h[w9, 7], {z12.h - z13.h}, z11.h[4] // 11000001-00011011-00111001-10110111 // CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, z11.h[4] // CHECK-ENCODING: [0xb7,0x39,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11b39b7 <unknown> bfmls za.h[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000-00010000-00011000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x18,0x10,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e01018 <unknown> bfmls za.h[w8, 0], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00010000-00011000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x18,0x10,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e01018 <unknown> bfmls za.h[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100-01010001-01011101 // CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x5d,0x51,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f4515d <unknown> bfmls za.h[w10, 5], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01010001-01011101 // CHECK-INST: bfmls za.h[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x5d,0x51,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f4515d <unknown> bfmls za.h[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000-01110001-10011111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x9f,0x71,0xe8,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e8719f <unknown> bfmls za.h[w11, 7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01110001-10011111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } // CHECK-ENCODING: [0x9f,0x71,0xe8,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e8719f <unknown> bfmls za.h[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110-01110011-11011111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xdf,0x73,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe73df <unknown> bfmls za.h[w11, 7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01110011-11011111 // CHECK-INST: bfmls za.h[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xdf,0x73,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe73df <unknown> bfmls za.h[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000-00010010-00011101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } // CHECK-ENCODING: [0x1d,0x12,0xf0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f0121d <unknown> bfmls za.h[w8, 5], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00010010-00011101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } // CHECK-ENCODING: [0x1d,0x12,0xf0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f0121d <unknown> bfmls za.h[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110-00010000-00011001 // CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x19,0x10,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe1019 <unknown> bfmls za.h[w8, 1], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00010000-00011001 // CHECK-INST: bfmls za.h[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } // CHECK-ENCODING: [0x19,0x10,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe1019 <unknown> bfmls za.h[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100-01010010-01011000 // CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x58,0x52,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f45258 <unknown> bfmls za.h[w10, 0], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01010010-01011000 // CHECK-INST: bfmls za.h[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } // CHECK-ENCODING: [0x58,0x52,0xf4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f45258 <unknown> bfmls za.h[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010-00010001-10011000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } // CHECK-ENCODING: [0x98,0x11,0xe2,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e21198 <unknown> bfmls za.h[w8, 0], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00010001-10011000 // CHECK-INST: bfmls za.h[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } // CHECK-ENCODING: [0x98,0x11,0xe2,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e21198 <unknown> bfmls za.h[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010-01010000-00011001 // CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } // CHECK-ENCODING: [0x19,0x50,0xfa,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fa5019 <unknown> bfmls za.h[w10, 1], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01010000-00011001 // CHECK-INST: bfmls za.h[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } // CHECK-ENCODING: [0x19,0x50,0xfa,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fa5019 <unknown> bfmls za.h[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110-00010010-11011101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xdd,0x12,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe12dd <unknown> bfmls za.h[w8, 5], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00010010-11011101 // CHECK-INST: bfmls za.h[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } // CHECK-ENCODING: [0xdd,0x12,0xfe,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fe12dd <unknown> bfmls za.h[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000-01110001-00011010 // CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x1a,0x71,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e0711a <unknown> bfmls za.h[w11, 2], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01110001-00011010 // CHECK-INST: bfmls za.h[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } // CHECK-ENCODING: [0x1a,0x71,0xe0,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e0711a <unknown> bfmls za.h[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010-00110001-10011111 // CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } // CHECK-ENCODING: [0x9f,0x31,0xea,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1ea319f <unknown> bfmls za.h[w9, 7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00110001-10011111 // CHECK-INST: bfmls za.h[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } // CHECK-ENCODING: [0x9f,0x31,0xea,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1ea319f <unknown> bfmls za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00001000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x08,0x1c,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701c08 <unknown> bfmls za.h[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00011100-00001000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h // CHECK-ENCODING: [0x08,0x1c,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701c08 <unknown> bfmls za.h[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01001101 // CHECK-INST: bfmls za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h // CHECK-ENCODING: [0x4d,0x5d,0x75,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1755d4d <unknown> bfmls za.h[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01011101-01001101 // CHECK-INST: bfmls za.h[w10, 5, vgx4], { z10.h - z13.h }, z5.h // CHECK-ENCODING: [0x4d,0x5d,0x75,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1755d4d <unknown> bfmls za.h[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10101111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h // CHECK-ENCODING: [0xaf,0x7d,0x78,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1787daf <unknown> bfmls za.h[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01111101-10101111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z13.h - z16.h }, z8.h // CHECK-ENCODING: [0xaf,0x7d,0x78,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1787daf <unknown> bfmls za.h[w11, 7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11101111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h // CHECK-ENCODING: [0xef,0x7f,0x7f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17f7fef <unknown> bfmls za.h[w11, 7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01111111-11101111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h // CHECK-ENCODING: [0xef,0x7f,0x7f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17f7fef <unknown> bfmls za.h[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00101101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h // CHECK-ENCODING: [0x2d,0x1e,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701e2d <unknown> bfmls za.h[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00011110-00101101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z17.h - z20.h }, z0.h // CHECK-ENCODING: [0x2d,0x1e,0x70,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1701e2d <unknown> bfmls za.h[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00101001 // CHECK-INST: bfmls za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h // CHECK-ENCODING: [0x29,0x1c,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1c29 <unknown> bfmls za.h[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00011100-00101001 // CHECK-INST: bfmls za.h[w8, 1, vgx4], { z1.h - z4.h }, z14.h // CHECK-ENCODING: [0x29,0x1c,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1c29 <unknown> bfmls za.h[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01101000 // CHECK-INST: bfmls za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h // CHECK-ENCODING: [0x68,0x5e,0x74,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1745e68 <unknown> bfmls za.h[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01011110-01101000 // CHECK-INST: bfmls za.h[w10, 0, vgx4], { z19.h - z22.h }, z4.h // CHECK-ENCODING: [0x68,0x5e,0x74,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1745e68 <unknown> bfmls za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10001000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h // CHECK-ENCODING: [0x88,0x1d,0x72,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1721d88 <unknown> bfmls za.h[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00011101-10001000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h // CHECK-ENCODING: [0x88,0x1d,0x72,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1721d88 <unknown> bfmls za.h[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00101001 // CHECK-INST: bfmls za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h // CHECK-ENCODING: [0x29,0x5c,0x7a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17a5c29 <unknown> bfmls za.h[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01011100-00101001 // CHECK-INST: bfmls za.h[w10, 1, vgx4], { z1.h - z4.h }, z10.h // CHECK-ENCODING: [0x29,0x5c,0x7a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17a5c29 <unknown> bfmls za.h[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11001101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h // CHECK-ENCODING: [0xcd,0x1e,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1ecd <unknown> bfmls za.h[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00011110-11001101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z22.h - z25.h }, z14.h // CHECK-ENCODING: [0xcd,0x1e,0x7e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17e1ecd <unknown> bfmls za.h[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00101010 // CHECK-INST: bfmls za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h // CHECK-ENCODING: [0x2a,0x7d,0x71,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1717d2a <unknown> bfmls za.h[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01111101-00101010 // CHECK-INST: bfmls za.h[w11, 2, vgx4], { z9.h - z12.h }, z1.h // CHECK-ENCODING: [0x2a,0x7d,0x71,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1717d2a <unknown> bfmls za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10001111 // CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h // CHECK-ENCODING: [0x8f,0x3d,0x7b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17b3d8f <unknown> bfmls za.h[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00111101-10001111 // CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h // CHECK-ENCODING: [0x8f,0x3d,0x7b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c17b3d8f <unknown> bfmls za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00110000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] // CHECK-ENCODING: [0x30,0x90,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109030 <unknown> bfmls za.h[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-00010000-10010000-00110000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] // CHECK-ENCODING: [0x30,0x90,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109030 <unknown> bfmls za.h[w10, 5, vgx4], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00110101 // CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2] // CHECK-ENCODING: [0x35,0xd5,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c115d535 <unknown> bfmls za.h[w10, 5], {z8.h - z11.h}, z5.h[2] // 11000001-00010101-11010101-00110101 // CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, z5.h[2] // CHECK-ENCODING: [0x35,0xd5,0x15,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c115d535 <unknown> bfmls za.h[w11, 7, vgx4], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10110111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6] // CHECK-ENCODING: [0xb7,0xfd,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c118fdb7 <unknown> bfmls za.h[w11, 7], {z12.h - z15.h}, z8.h[6] // 11000001-00011000-11111101-10110111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, z8.h[6] // CHECK-ENCODING: [0xb7,0xfd,0x18,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c118fdb7 <unknown> bfmls za.h[w11, 7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10111111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7] // CHECK-ENCODING: [0xbf,0xff,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11fffbf <unknown> bfmls za.h[w11, 7], {z28.h - z31.h}, z15.h[7] // 11000001-00011111-11111111-10111111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, z15.h[7] // CHECK-ENCODING: [0xbf,0xff,0x1f,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11fffbf <unknown> bfmls za.h[w8, 5, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00110101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6] // CHECK-ENCODING: [0x35,0x9e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109e35 <unknown> bfmls za.h[w8, 5], {z16.h - z19.h}, z0.h[6] // 11000001-00010000-10011110-00110101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, z0.h[6] // CHECK-ENCODING: [0x35,0x9e,0x10,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1109e35 <unknown> bfmls za.h[w8, 1, vgx4], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00110001 // CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2] // CHECK-ENCODING: [0x31,0x94,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9431 <unknown> bfmls za.h[w8, 1], {z0.h - z3.h}, z14.h[2] // 11000001-00011110-10010100-00110001 // CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, z14.h[2] // CHECK-ENCODING: [0x31,0x94,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9431 <unknown> bfmls za.h[w10, 0, vgx4], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00111000 // CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3] // CHECK-ENCODING: [0x38,0xd6,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c114d638 <unknown> bfmls za.h[w10, 0], {z16.h - z19.h}, z4.h[3] // 11000001-00010100-11010110-00111000 // CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, z4.h[3] // CHECK-ENCODING: [0x38,0xd6,0x14,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c114d638 <unknown> bfmls za.h[w8, 0, vgx4], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10110000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4] // CHECK-ENCODING: [0xb0,0x99,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11299b0 <unknown> bfmls za.h[w8, 0], {z12.h - z15.h}, z2.h[4] // 11000001-00010010-10011001-10110000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, z2.h[4] // CHECK-ENCODING: [0xb0,0x99,0x12,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11299b0 <unknown> bfmls za.h[w10, 1, vgx4], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00110001 // CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4] // CHECK-ENCODING: [0x31,0xd8,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11ad831 <unknown> bfmls za.h[w10, 1], {z0.h - z3.h}, z10.h[4] // 11000001-00011010-11011000-00110001 // CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, z10.h[4] // CHECK-ENCODING: [0x31,0xd8,0x1a,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11ad831 <unknown> bfmls za.h[w8, 5, vgx4], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10111101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5] // CHECK-ENCODING: [0xbd,0x9a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9abd <unknown> bfmls za.h[w8, 5], {z20.h - z23.h}, z14.h[5] // 11000001-00011110-10011010-10111101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, z14.h[5] // CHECK-ENCODING: [0xbd,0x9a,0x1e,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11e9abd <unknown> bfmls za.h[w11, 2, vgx4], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00110010 // CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2] // CHECK-ENCODING: [0x32,0xf5,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c111f532 <unknown> bfmls za.h[w11, 2], {z8.h - z11.h}, z1.h[2] // 11000001-00010001-11110101-00110010 // CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, z1.h[2] // CHECK-ENCODING: [0x32,0xf5,0x11,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c111f532 <unknown> bfmls za.h[w9, 7, vgx4], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10110111 // CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4] // CHECK-ENCODING: [0xb7,0xb9,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11bb9b7 <unknown> bfmls za.h[w9, 7], {z12.h - z15.h}, z11.h[4] // 11000001-00011011-10111001-10110111 // CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, z11.h[4] // CHECK-ENCODING: [0xb7,0xb9,0x1b,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c11bb9b7 <unknown> bfmls za.h[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00011000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x18,0x10,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11018 <unknown> bfmls za.h[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010000-00011000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x18,0x10,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11018 <unknown> bfmls za.h[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00011101 // CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x1d,0x51,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f5511d <unknown> bfmls za.h[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010001-00011101 // CHECK-INST: bfmls za.h[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x1d,0x51,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f5511d <unknown> bfmls za.h[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10011111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x9f,0x71,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9719f <unknown> bfmls za.h[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110001-10011111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x9f,0x71,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9719f <unknown> bfmls za.h[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10011111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x9f,0x73,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd739f <unknown> bfmls za.h[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110011-10011111 // CHECK-INST: bfmls za.h[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x9f,0x73,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd739f <unknown> bfmls za.h[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00011101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } // CHECK-ENCODING: [0x1d,0x12,0xf1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f1121d <unknown> bfmls za.h[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010010-00011101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } // CHECK-ENCODING: [0x1d,0x12,0xf1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f1121d <unknown> bfmls za.h[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00011001 // CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x19,0x10,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd1019 <unknown> bfmls za.h[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010000-00011001 // CHECK-INST: bfmls za.h[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x19,0x10,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd1019 <unknown> bfmls za.h[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00011000 // CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x18,0x52,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f55218 <unknown> bfmls za.h[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010010-00011000 // CHECK-INST: bfmls za.h[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } // CHECK-ENCODING: [0x18,0x52,0xf5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f55218 <unknown> bfmls za.h[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10011000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x98,0x11,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11198 <unknown> bfmls za.h[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010001-10011000 // CHECK-INST: bfmls za.h[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x98,0x11,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e11198 <unknown> bfmls za.h[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00011001 // CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } // CHECK-ENCODING: [0x19,0x50,0xf9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f95019 <unknown> bfmls za.h[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010000-00011001 // CHECK-INST: bfmls za.h[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } // CHECK-ENCODING: [0x19,0x50,0xf9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1f95019 <unknown> bfmls za.h[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10011101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x9d,0x12,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd129d <unknown> bfmls za.h[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010010-10011101 // CHECK-INST: bfmls za.h[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } // CHECK-ENCODING: [0x9d,0x12,0xfd,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1fd129d <unknown> bfmls za.h[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00011010 // CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x1a,0x71,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e1711a <unknown> bfmls za.h[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110001-00011010 // CHECK-INST: bfmls za.h[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } // CHECK-ENCODING: [0x1a,0x71,0xe1,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e1711a <unknown> bfmls za.h[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10011111 // CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x9f,0x31,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9319f <unknown> bfmls za.h[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110001-10011111 // CHECK-INST: bfmls za.h[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } // CHECK-ENCODING: [0x9f,0x31,0xe9,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e9319f <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmopa-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmopa-diagnostics.s index 8b418f4..e6d208d 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmopa-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmopa-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid predicate register diff --git a/llvm/test/MC/AArch64/SME2p1/bfmopa.s b/llvm/test/MC/AArch64/SME2/bfmopa.s index 7a08185..2a9b1e5 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmopa.s +++ b/llvm/test/MC/AArch64/SME2/bfmopa.s @@ -1,84 +1,84 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmopa za0.h, p0/m, p0/m, z0.h, z0.h // 10000001-10100000-00000000-00001000 // CHECK-INST: bfmopa za0.h, p0/m, p0/m, z0.h, z0.h // CHECK-ENCODING: [0x08,0x00,0xa0,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a00008 <unknown> bfmopa za1.h, p5/m, p2/m, z10.h, z21.h // 10000001-10110101-01010101-01001001 // CHECK-INST: bfmopa za1.h, p5/m, p2/m, z10.h, z21.h // CHECK-ENCODING: [0x49,0x55,0xb5,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81b55549 <unknown> bfmopa za1.h, p3/m, p7/m, z13.h, z8.h // 10000001-10101000-11101101-10101001 // CHECK-INST: bfmopa za1.h, p3/m, p7/m, z13.h, z8.h // CHECK-ENCODING: [0xa9,0xed,0xa8,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a8eda9 <unknown> bfmopa za1.h, p7/m, p7/m, z31.h, z31.h // 10000001-10111111-11111111-11101001 // CHECK-INST: bfmopa za1.h, p7/m, p7/m, z31.h, z31.h // CHECK-ENCODING: [0xe9,0xff,0xbf,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81bfffe9 <unknown> bfmopa za1.h, p3/m, p0/m, z17.h, z16.h // 10000001-10110000-00001110-00101001 // CHECK-INST: bfmopa za1.h, p3/m, p0/m, z17.h, z16.h // CHECK-ENCODING: [0x29,0x0e,0xb0,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81b00e29 <unknown> bfmopa za1.h, p1/m, p4/m, z1.h, z30.h // 10000001-10111110-10000100-00101001 // CHECK-INST: bfmopa za1.h, p1/m, p4/m, z1.h, z30.h // CHECK-ENCODING: [0x29,0x84,0xbe,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81be8429 <unknown> bfmopa za0.h, p5/m, p2/m, z19.h, z20.h // 10000001-10110100-01010110-01101000 // CHECK-INST: bfmopa za0.h, p5/m, p2/m, z19.h, z20.h // CHECK-ENCODING: [0x68,0x56,0xb4,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81b45668 <unknown> bfmopa za0.h, p6/m, p0/m, z12.h, z2.h // 10000001-10100010-00011001-10001000 // CHECK-INST: bfmopa za0.h, p6/m, p0/m, z12.h, z2.h // CHECK-ENCODING: [0x88,0x19,0xa2,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a21988 <unknown> bfmopa za1.h, p2/m, p6/m, z1.h, z26.h // 10000001-10111010-11001000-00101001 // CHECK-INST: bfmopa za1.h, p2/m, p6/m, z1.h, z26.h // CHECK-ENCODING: [0x29,0xc8,0xba,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81bac829 <unknown> bfmopa za1.h, p2/m, p0/m, z22.h, z30.h // 10000001-10111110-00001010-11001001 // CHECK-INST: bfmopa za1.h, p2/m, p0/m, z22.h, z30.h // CHECK-ENCODING: [0xc9,0x0a,0xbe,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81be0ac9 <unknown> bfmopa za0.h, p5/m, p7/m, z9.h, z1.h // 10000001-10100001-11110101-00101000 // CHECK-INST: bfmopa za0.h, p5/m, p7/m, z9.h, z1.h // CHECK-ENCODING: [0x28,0xf5,0xa1,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a1f528 <unknown> bfmopa za1.h, p2/m, p5/m, z12.h, z11.h // 10000001-10101011-10101001-10001001 // CHECK-INST: bfmopa za1.h, p2/m, p5/m, z12.h, z11.h // CHECK-ENCODING: [0x89,0xa9,0xab,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81aba989 <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfmops-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmops-diagnostics.s index 84275af..14c25de 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmops-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfmops-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid predicate register diff --git a/llvm/test/MC/AArch64/SME2p1/bfmops.s b/llvm/test/MC/AArch64/SME2/bfmops.s index 380c65f..2b76986 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfmops.s +++ b/llvm/test/MC/AArch64/SME2/bfmops.s @@ -1,84 +1,84 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfmops za0.h, p0/m, p0/m, z0.h, z0.h // 10000001-10100000-00000000-00011000 // CHECK-INST: bfmops za0.h, p0/m, p0/m, z0.h, z0.h // CHECK-ENCODING: [0x18,0x00,0xa0,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a00018 <unknown> bfmops za1.h, p5/m, p2/m, z10.h, z21.h // 10000001-10110101-01010101-01011001 // CHECK-INST: bfmops za1.h, p5/m, p2/m, z10.h, z21.h // CHECK-ENCODING: [0x59,0x55,0xb5,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81b55559 <unknown> bfmops za1.h, p3/m, p7/m, z13.h, z8.h // 10000001-10101000-11101101-10111001 // CHECK-INST: bfmops za1.h, p3/m, p7/m, z13.h, z8.h // CHECK-ENCODING: [0xb9,0xed,0xa8,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a8edb9 <unknown> bfmops za1.h, p7/m, p7/m, z31.h, z31.h // 10000001-10111111-11111111-11111001 // CHECK-INST: bfmops za1.h, p7/m, p7/m, z31.h, z31.h // CHECK-ENCODING: [0xf9,0xff,0xbf,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81bffff9 <unknown> bfmops za1.h, p3/m, p0/m, z17.h, z16.h // 10000001-10110000-00001110-00111001 // CHECK-INST: bfmops za1.h, p3/m, p0/m, z17.h, z16.h // CHECK-ENCODING: [0x39,0x0e,0xb0,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81b00e39 <unknown> bfmops za1.h, p1/m, p4/m, z1.h, z30.h // 10000001-10111110-10000100-00111001 // CHECK-INST: bfmops za1.h, p1/m, p4/m, z1.h, z30.h // CHECK-ENCODING: [0x39,0x84,0xbe,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81be8439 <unknown> bfmops za0.h, p5/m, p2/m, z19.h, z20.h // 10000001-10110100-01010110-01111000 // CHECK-INST: bfmops za0.h, p5/m, p2/m, z19.h, z20.h // CHECK-ENCODING: [0x78,0x56,0xb4,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81b45678 <unknown> bfmops za0.h, p6/m, p0/m, z12.h, z2.h // 10000001-10100010-00011001-10011000 // CHECK-INST: bfmops za0.h, p6/m, p0/m, z12.h, z2.h // CHECK-ENCODING: [0x98,0x19,0xa2,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a21998 <unknown> bfmops za1.h, p2/m, p6/m, z1.h, z26.h // 10000001-10111010-11001000-00111001 // CHECK-INST: bfmops za1.h, p2/m, p6/m, z1.h, z26.h // CHECK-ENCODING: [0x39,0xc8,0xba,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81bac839 <unknown> bfmops za1.h, p2/m, p0/m, z22.h, z30.h // 10000001-10111110-00001010-11011001 // CHECK-INST: bfmops za1.h, p2/m, p0/m, z22.h, z30.h // CHECK-ENCODING: [0xd9,0x0a,0xbe,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81be0ad9 <unknown> bfmops za0.h, p5/m, p7/m, z9.h, z1.h // 10000001-10100001-11110101-00111000 // CHECK-INST: bfmops za0.h, p5/m, p7/m, z9.h, z1.h // CHECK-ENCODING: [0x38,0xf5,0xa1,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81a1f538 <unknown> bfmops za1.h, p2/m, p5/m, z12.h, z11.h // 10000001-10101011-10101001-10011001 // CHECK-INST: bfmops za1.h, p2/m, p5/m, z12.h, z11.h // CHECK-ENCODING: [0x99,0xa9,0xab,0x81] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: 81aba999 <unknown> diff --git a/llvm/test/MC/AArch64/SME2p1/bfsub-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfsub-diagnostics.s index 9d3680e..5dade3e 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfsub-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/bfsub-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Out of range index offset diff --git a/llvm/test/MC/AArch64/SME2p1/bfsub.s b/llvm/test/MC/AArch64/SME2/bfsub.s index dac5f97..6cfd507 100644 --- a/llvm/test/MC/AArch64/SME2p1/bfsub.s +++ b/llvm/test/MC/AArch64/SME2/bfsub.s @@ -1,300 +1,300 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p1,+b16b16 < %s \ -// RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+b16b16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST bfsub za.h[w8, 0, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00001000 // CHECK-INST: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x08,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c08 <unknown> bfsub za.h[w8, 0], {z0.h - z1.h} // 11000001-11100100-00011100-00001000 // CHECK-INST: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x08,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c08 <unknown> bfsub za.h[w10, 5, vgx2], {z10.h, z11.h} // 11000001-11100100-01011101-01001101 // CHECK-INST: bfsub za.h[w10, 5, vgx2], { z10.h, z11.h } // CHECK-ENCODING: [0x4d,0x5d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45d4d <unknown> bfsub za.h[w10, 5], {z10.h - z11.h} // 11000001-11100100-01011101-01001101 // CHECK-INST: bfsub za.h[w10, 5, vgx2], { z10.h, z11.h } // CHECK-ENCODING: [0x4d,0x5d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45d4d <unknown> bfsub za.h[w11, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-01111101-10001111 // CHECK-INST: bfsub za.h[w11, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x8f,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d8f <unknown> bfsub za.h[w11, 7], {z12.h - z13.h} // 11000001-11100100-01111101-10001111 // CHECK-INST: bfsub za.h[w11, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x8f,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d8f <unknown> bfsub za.h[w11, 7, vgx2], {z30.h, z31.h} // 11000001-11100100-01111111-11001111 // CHECK-INST: bfsub za.h[w11, 7, vgx2], { z30.h, z31.h } // CHECK-ENCODING: [0xcf,0x7f,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47fcf <unknown> bfsub za.h[w11, 7], {z30.h - z31.h} // 11000001-11100100-01111111-11001111 // CHECK-INST: bfsub za.h[w11, 7, vgx2], { z30.h, z31.h } // CHECK-ENCODING: [0xcf,0x7f,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47fcf <unknown> bfsub za.h[w8, 5, vgx2], {z16.h, z17.h} // 11000001-11100100-00011110-00001101 // CHECK-INST: bfsub za.h[w8, 5, vgx2], { z16.h, z17.h } // CHECK-ENCODING: [0x0d,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41e0d <unknown> bfsub za.h[w8, 5], {z16.h - z17.h} // 11000001-11100100-00011110-00001101 // CHECK-INST: bfsub za.h[w8, 5, vgx2], { z16.h, z17.h } // CHECK-ENCODING: [0x0d,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41e0d <unknown> bfsub za.h[w8, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-00011100-00001001 // CHECK-INST: bfsub za.h[w8, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x09,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c09 <unknown> bfsub za.h[w8, 1], {z0.h - z1.h} // 11000001-11100100-00011100-00001001 // CHECK-INST: bfsub za.h[w8, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x09,0x1c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41c09 <unknown> bfsub za.h[w10, 0, vgx2], {z18.h, z19.h} // 11000001-11100100-01011110, 01001000 // CHECK-INST: bfsub za.h[w10, 0, vgx2], { z18.h, z19.h } // CHECK-ENCODING: [0x48,0x5e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45e48 <unknown> bfsub za.h[w10, 0], {z18.h - z19.h} // 11000001-11100100-01011110-01001000 // CHECK-INST: bfsub za.h[w10, 0, vgx2], { z18.h, z19.h } // CHECK-ENCODING: [0x48,0x5e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45e48 <unknown> bfsub za.h[w8, 0, vgx2], {z12.h, z13.h} // 11000001-11100100-00011101-10001000 // CHECK-INST: bfsub za.h[w8, 0, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x88,0x1d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41d88 <unknown> bfsub za.h[w8, 0], {z12.h - z13.h} // 11000001-11100100-00011101-10001000 // CHECK-INST: bfsub za.h[w8, 0, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x88,0x1d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41d88 <unknown> bfsub za.h[w10, 1, vgx2], {z0.h, z1.h} // 11000001-11100100-01011100-00001001 // CHECK-INST: bfsub za.h[w10, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x09,0x5c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45c09 <unknown> bfsub za.h[w10, 1], {z0.h - z1.h} // 11000001-11100100-01011100-00001001 // CHECK-INST: bfsub za.h[w10, 1, vgx2], { z0.h, z1.h } // CHECK-ENCODING: [0x09,0x5c,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e45c09 <unknown> bfsub za.h[w8, 5, vgx2], {z22.h, z23.h} // 11000001-11100100-00011110, 11001101 // CHECK-INST: bfsub za.h[w8, 5, vgx2], { z22.h, z23.h } // CHECK-ENCODING: [0xcd,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41ecd <unknown> bfsub za.h[w8, 5], {z22.h - z23.h} // 11000001-11100100-00011110-11001101 // CHECK-INST: bfsub za.h[w8, 5, vgx2], { z22.h, z23.h } // CHECK-ENCODING: [0xcd,0x1e,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e41ecd <unknown> bfsub za.h[w11, 2, vgx2], {z8.h, z9.h} // 11000001-11100100-01111101-00001010 // CHECK-INST: bfsub za.h[w11, 2, vgx2], { z8.h, z9.h } // CHECK-ENCODING: [0x0a,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d0a <unknown> bfsub za.h[w11, 2], {z8.h - z9.h} // 11000001-11100100-01111101-00001010 // CHECK-INST: bfsub za.h[w11, 2, vgx2], { z8.h, z9.h } // CHECK-ENCODING: [0x0a,0x7d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e47d0a <unknown> bfsub za.h[w9, 7, vgx2], {z12.h, z13.h} // 11000001-11100100-00111101-10001111 // CHECK-INST: bfsub za.h[w9, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x8f,0x3d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e43d8f <unknown> bfsub za.h[w9, 7], {z12.h - z13.h} // 11000001-11100100-00111101-10001111 // CHECK-INST: bfsub za.h[w9, 7, vgx2], { z12.h, z13.h } // CHECK-ENCODING: [0x8f,0x3d,0xe4,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e43d8f <unknown> bfsub za.h[w8, 0, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00001000 // CHECK-INST: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x08,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c08 <unknown> bfsub za.h[w8, 0], {z0.h - z3.h} // 11000001-11100101-00011100-00001000 // CHECK-INST: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x08,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c08 <unknown> bfsub za.h[w10, 5, vgx4], {z8.h - z11.h} // 11000001-11100101-01011101-00001101 // CHECK-INST: bfsub za.h[w10, 5, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x0d,0x5d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55d0d <unknown> bfsub za.h[w10, 5], {z8.h - z11.h} // 11000001-11100101-01011101-00001101 // CHECK-INST: bfsub za.h[w10, 5, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x0d,0x5d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55d0d <unknown> bfsub za.h[w11, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-01111101-10001111 // CHECK-INST: bfsub za.h[w11, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x8f,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d8f <unknown> bfsub za.h[w11, 7], {z12.h - z15.h} // 11000001-11100101-01111101-10001111 // CHECK-INST: bfsub za.h[w11, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x8f,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d8f <unknown> bfsub za.h[w11, 7, vgx4], {z28.h - z31.h} // 11000001-11100101-01111111-10001111 // CHECK-INST: bfsub za.h[w11, 7, vgx4], { z28.h - z31.h } // CHECK-ENCODING: [0x8f,0x7f,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57f8f <unknown> bfsub za.h[w11, 7], {z28.h - z31.h} // 11000001-11100101-01111111-10001111 // CHECK-INST: bfsub za.h[w11, 7, vgx4], { z28.h - z31.h } // CHECK-ENCODING: [0x8f,0x7f,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57f8f <unknown> bfsub za.h[w8, 5, vgx4], {z16.h - z19.h} // 11000001-11100101-00011110-00001101 // CHECK-INST: bfsub za.h[w8, 5, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x0d,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e0d <unknown> bfsub za.h[w8, 5], {z16.h - z19.h} // 11000001-11100101-00011110-00001101 // CHECK-INST: bfsub za.h[w8, 5, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x0d,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e0d <unknown> bfsub za.h[w8, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-00011100-00001001 // CHECK-INST: bfsub za.h[w8, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x09,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c09 <unknown> bfsub za.h[w8, 1], {z0.h - z3.h} // 11000001-11100101-00011100-00001001 // CHECK-INST: bfsub za.h[w8, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x09,0x1c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51c09 <unknown> bfsub za.h[w10, 0, vgx4], {z16.h - z19.h} // 11000001-11100101-01011110-00001000 // CHECK-INST: bfsub za.h[w10, 0, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x08,0x5e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55e08 <unknown> bfsub za.h[w10, 0], {z16.h - z19.h} // 11000001-11100101-01011110-00001000 // CHECK-INST: bfsub za.h[w10, 0, vgx4], { z16.h - z19.h } // CHECK-ENCODING: [0x08,0x5e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55e08 <unknown> bfsub za.h[w8, 0, vgx4], {z12.h - z15.h} // 11000001-11100101-00011101-10001000 // CHECK-INST: bfsub za.h[w8, 0, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x88,0x1d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51d88 <unknown> bfsub za.h[w8, 0], {z12.h - z15.h} // 11000001-11100101-00011101-10001000 // CHECK-INST: bfsub za.h[w8, 0, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x88,0x1d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51d88 <unknown> bfsub za.h[w10, 1, vgx4], {z0.h - z3.h} // 11000001-11100101-01011100-00001001 // CHECK-INST: bfsub za.h[w10, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x09,0x5c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55c09 <unknown> bfsub za.h[w10, 1], {z0.h - z3.h} // 11000001-11100101-01011100-00001001 // CHECK-INST: bfsub za.h[w10, 1, vgx4], { z0.h - z3.h } // CHECK-ENCODING: [0x09,0x5c,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e55c09 <unknown> bfsub za.h[w8, 5, vgx4], {z20.h - z23.h} // 11000001-11100101-00011110-10001101 // CHECK-INST: bfsub za.h[w8, 5, vgx4], { z20.h - z23.h } // CHECK-ENCODING: [0x8d,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e8d <unknown> bfsub za.h[w8, 5], {z20.h - z23.h} // 11000001-11100101-00011110-10001101 // CHECK-INST: bfsub za.h[w8, 5, vgx4], { z20.h - z23.h } // CHECK-ENCODING: [0x8d,0x1e,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e51e8d <unknown> bfsub za.h[w11, 2, vgx4], {z8.h - z11.h} // 11000001-11100101-01111101-00001010 // CHECK-INST: bfsub za.h[w11, 2, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x0a,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d0a <unknown> bfsub za.h[w11, 2], {z8.h - z11.h} // 11000001-11100101-01111101-00001010 // CHECK-INST: bfsub za.h[w11, 2, vgx4], { z8.h - z11.h } // CHECK-ENCODING: [0x0a,0x7d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e57d0a <unknown> bfsub za.h[w9, 7, vgx4], {z12.h - z15.h} // 11000001-11100101-00111101-10001111 // CHECK-INST: bfsub za.h[w9, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x8f,0x3d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e53d8f <unknown> bfsub za.h[w9, 7], {z12.h - z15.h} // 11000001-11100101-00111101-10001111 // CHECK-INST: bfsub za.h[w9, 7, vgx4], { z12.h - z15.h } // CHECK-ENCODING: [0x8f,0x3d,0xe5,0xc1] -// CHECK-ERROR: instruction requires: b16b16 sme2p1 +// CHECK-ERROR: instruction requires: b16b16 sme2 // CHECK-UNKNOWN: c1e53d8f <unknown> diff --git a/llvm/test/MC/AArch64/SVE2p1/pmov.s b/llvm/test/MC/AArch64/SVE2p1/pmov.s index ca2b55b..8c08cf7 100644 --- a/llvm/test/MC/AArch64/SVE2p1/pmov.s +++ b/llvm/test/MC/AArch64/SVE2p1/pmov.s @@ -20,6 +20,12 @@ pmov p0.h, z0[0] // 00000101-00101100-00111000-00000000 // CHECK-ERROR: instruction requires: sme2p1 or sve2p1 // CHECK-UNKNOWN: 052c3800 <unknown> +pmov p0.h, z0 // 00000101-00101100-00111000-00000000 +// CHECK-INST: pmov p0.h, z0[0] +// CHECK-ENCODING: [0x00,0x38,0x2c,0x05] +// CHECK-ERROR: instruction requires: sme2p1 or sve2p1 +// CHECK-UNKNOWN: 052c3800 <unknown> + pmov p5.h, z10[0] // 00000101-00101100-00111001-01000101 // CHECK-INST: pmov p5.h, z10[0] // CHECK-ENCODING: [0x45,0x39,0x2c,0x05] @@ -44,6 +50,12 @@ pmov p0.s, z0[0] // 00000101-01101000-00111000-00000000 // CHECK-ERROR: instruction requires: sme2p1 or sve2p1 // CHECK-UNKNOWN: 05683800 <unknown> +pmov p0.s, z0 // 00000101-01101000-00111000-00000000 +// CHECK-INST: pmov p0.s, z0[0] +// CHECK-ENCODING: [0x00,0x38,0x68,0x05] +// CHECK-ERROR: instruction requires: sme2p1 or sve2p1 +// CHECK-UNKNOWN: 05683800 <unknown> + pmov p5.s, z10[2] // 00000101-01101100-00111001-01000101 // CHECK-INST: pmov p5.s, z10[2] // CHECK-ENCODING: [0x45,0x39,0x6c,0x05] @@ -68,6 +80,12 @@ pmov p0.d, z0[0] // 00000101-10101000-00111000-00000000 // CHECK-ERROR: instruction requires: sme2p1 or sve2p1 // CHECK-UNKNOWN: 05a83800 <unknown> +pmov p0.d, z0 // 00000101-10101000-00111000-00000000 +// CHECK-INST: pmov p0.d, z0[0] +// CHECK-ENCODING: [0x00,0x38,0xa8,0x05] +// CHECK-ERROR: instruction requires: sme2p1 or sve2p1 +// CHECK-UNKNOWN: 05a83800 <unknown> + pmov p5.d, z10[6] // 00000101-11101100-00111001-01000101 // CHECK-INST: pmov p5.d, z10[6] // CHECK-ENCODING: [0x45,0x39,0xec,0x05] @@ -122,6 +140,12 @@ pmov z0[0], p0.h // 00000101-00101101-00111000-00000000 // CHECK-ERROR: instruction requires: sme2p1 or sve2p1 // CHECK-UNKNOWN: 052d3800 <unknown> +pmov z0, p0.h // 00000101-00101101-00111000-00000000 +// CHECK-INST: pmov z0[0], p0.h +// CHECK-ENCODING: [0x00,0x38,0x2d,0x05] +// CHECK-ERROR: instruction requires: sme2p1 or sve2p1 +// CHECK-UNKNOWN: 052d3800 <unknown> + pmov z21[0], p10.h // 00000101-00101101-00111001-01010101 // CHECK-INST: pmov z21[0], p10.h // CHECK-ENCODING: [0x55,0x39,0x2d,0x05] @@ -147,6 +171,12 @@ pmov z0[0], p0.s // 00000101-01101001-00111000-00000000 // CHECK-ERROR: instruction requires: sme2p1 or sve2p1 // CHECK-UNKNOWN: 05693800 <unknown> +pmov z0, p0.s // 00000101-01101001-00111000-00000000 +// CHECK-INST: pmov z0[0], p0.s +// CHECK-ENCODING: [0x00,0x38,0x69,0x05] +// CHECK-ERROR: instruction requires: sme2p1 or sve2p1 +// CHECK-UNKNOWN: 05693800 <unknown> + pmov z21[2], p10.s // 00000101-01101101-00111001-01010101 // CHECK-INST: pmov z21[2], p10.s // CHECK-ENCODING: [0x55,0x39,0x6d,0x05] @@ -171,6 +201,12 @@ pmov z0[0], p0.d // 00000101-10101001-00111000-00000000 // CHECK-ERROR: instruction requires: sme2p1 or sve2p1 // CHECK-UNKNOWN: 05a93800 <unknown> +pmov z0, p0.d // 00000101-10101001-00111000-00000000 +// CHECK-INST: pmov z0[0], p0.d +// CHECK-ENCODING: [0x00,0x38,0xa9,0x05] +// CHECK-ERROR: instruction requires: sme2p1 or sve2p1 +// CHECK-UNKNOWN: 05a93800 <unknown> + pmov z21[6], p10.d // 00000101-11101101-00111001-01010101 // CHECK-INST: pmov z21[6], p10.d // CHECK-ENCODING: [0x55,0x39,0xed,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s new file mode 100644 index 0000000..dbd732f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s @@ -0,0 +1,199 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s + +ds_direct_load v1 wait_va_vdst:15 +// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce] + +ds_direct_load v2 wait_va_vdst:14 +// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce] + +ds_direct_load v3 wait_va_vdst:13 +// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce] + +ds_direct_load v4 wait_va_vdst:12 +// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce] + +ds_direct_load v5 wait_va_vdst:11 +// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce] + +ds_direct_load v6 wait_va_vdst:10 +// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce] + +ds_direct_load v7 wait_va_vdst:9 +// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce] + +ds_direct_load v8 wait_va_vdst:8 +// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce] + +ds_direct_load v9 wait_va_vdst:7 +// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce] + +ds_direct_load v10 wait_va_vdst:6 +// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce] + +ds_direct_load v11 wait_va_vdst:5 +// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce] + +ds_direct_load v12 wait_va_vdst:4 +// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce] + +ds_direct_load v13 wait_va_vdst:3 +// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce] + +ds_direct_load v14 wait_va_vdst:2 +// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce] + +ds_direct_load v15 wait_va_vdst:1 +// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce] + +ds_direct_load v16 wait_va_vdst:0 +// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce] + +ds_direct_load v17 +// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce] + +ds_param_load v1, attr0.x wait_va_vdst:15 +// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce] + +ds_param_load v2, attr0.y wait_va_vdst:14 +// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce] + +ds_param_load v3, attr0.z wait_va_vdst:13 +// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce] + +ds_param_load v4, attr0.w wait_va_vdst:12 +// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce] + +ds_param_load v5, attr0.x wait_va_vdst:11 +// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce] + +ds_param_load v6, attr1.x wait_va_vdst:10 +// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce] + +ds_param_load v7, attr2.y wait_va_vdst:9 +// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce] + +ds_param_load v8, attr3.z wait_va_vdst:8 +// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce] + +ds_param_load v9, attr4.w wait_va_vdst:7 +// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce] + +ds_param_load v10, attr11.x wait_va_vdst:6 +// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce] + +ds_param_load v11, attr22.y wait_va_vdst:5 +// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce] + +ds_param_load v13, attr32.x wait_va_vdst:3 +// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x80,0x03,0xce] + +ds_param_load v14, attr32.y wait_va_vdst:2 +// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x81,0x02,0xce] + +ds_param_load v15, attr32.z wait_va_vdst:1 +// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x82,0x01,0xce] + +ds_param_load v16, attr32.w wait_va_vdst:0 +// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x83,0x00,0xce] + +ds_param_load v17, attr32.w +// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x83,0x00,0xce] + +ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 +// GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce] + +ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 +// GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce] + +ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 +// GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce] + +ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 +// GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce] + +ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 +// GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce] + +ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 +// GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce] + +ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 +// GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce] + +ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 +// GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce] + +ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 +// GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce] + +ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 +// GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce] + +ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 +// GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce] + +ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 +// GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce] + +ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 +// GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce] + +ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 +// GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce] + +ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 +// GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce] + +ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 +// GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce] + +ds_direct_load v17 wait_vm_vsrc:1 +// GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce] + +ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 +// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce] + +ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 +// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce] + +ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 +// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce] + +ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 +// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce] + +ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 +// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce] + +ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 +// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce] + +ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 +// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce] + +ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 +// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce] + +ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 +// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce] + +ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 +// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce] + +ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 +// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce] + +ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 +// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x80,0x83,0xce] + +ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 +// GFX12: ds_param_load v14, attr32.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x81,0x82,0xce] + +ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 +// GFX12: ds_param_load v15, attr32.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x82,0x81,0xce] + +ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 +// GFX12: ds_param_load v16, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x83,0x80,0xce] + +ds_param_load v17, attr32.w wait_vm_vsrc:1 +// GFX12: ds_param_load v17, attr32.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x83,0x80,0xce] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt new file mode 100644 index 0000000..b7c0394 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt @@ -0,0 +1,206 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s + +# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce] +0x0a,0x00,0x16,0xce + +# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x00,0x15,0xce] +0x0b,0x00,0x15,0xce + +# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x00,0x14,0xce] +0x0c,0x00,0x14,0xce + +# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0x00,0x13,0xce] +0x0d,0x00,0x13,0xce + +# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0x00,0x12,0xce] +0x0e,0x00,0x12,0xce + +# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0x00,0x11,0xce] +0x0f,0x00,0x11,0xce + +# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0x00,0x10,0xce] +0x10,0x00,0x10,0xce + +# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0x00,0x10,0xce] +0x11,0x00,0x10,0xce + +# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x1f,0xce] +0x01,0x00,0x1f,0xce + +# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x00,0x1e,0xce] +0x02,0x00,0x1e,0xce + +# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x00,0x1d,0xce] +0x03,0x00,0x1d,0xce + +# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x00,0x1c,0xce] +0x04,0x00,0x1c,0xce + +# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x1b,0xce] +0x05,0x00,0x1b,0xce + +# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x00,0x1a,0xce] +0x06,0x00,0x1a,0xce + +# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x00,0x19,0xce] +0x07,0x00,0x19,0xce + +# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x00,0x18,0xce] +0x08,0x00,0x18,0xce + +# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x00,0x17,0xce] +0x09,0x00,0x17,0xce + +# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x2c,0x06,0xce] +0x0a,0x2c,0x06,0xce + +# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:0 ; encoding: [0x0b,0x59,0x05,0xce] +0x0b,0x59,0x05,0xce + +# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:0 ; encoding: [0x0c,0x86,0x04,0xce] +0x0c,0x86,0x04,0xce + +# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:0 ; encoding: [0x0d,0xfc,0x03,0xce] +0x0d,0xfc,0x03,0xce + +# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:0 ; encoding: [0x0e,0xfd,0x02,0xce] +0x0e,0xfd,0x02,0xce + +# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:0 ; encoding: [0x0f,0xfe,0x01,0xce] +0x0f,0xfe,0x01,0xce + +# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x10,0xff,0x00,0xce] +0x10,0xff,0x00,0xce + +# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:0 ; encoding: [0x11,0xff,0x00,0xce] +0x11,0xff,0x00,0xce + +# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:0 ; encoding: [0x01,0x00,0x0f,0xce] +0x01,0x00,0x0f,0xce + +# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:0 ; encoding: [0x02,0x01,0x0e,0xce] +0x02,0x01,0x0e,0xce + +# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:0 ; encoding: [0x03,0x02,0x0d,0xce] +0x03,0x02,0x0d,0xce + +# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:0 ; encoding: [0x04,0x03,0x0c,0xce] +0x04,0x03,0x0c,0xce + +# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:0 ; encoding: [0x05,0x00,0x0b,0xce] +0x05,0x00,0x0b,0xce + +# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:0 ; encoding: [0x06,0x04,0x0a,0xce] +0x06,0x04,0x0a,0xce + +# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:0 ; encoding: [0x07,0x09,0x09,0xce] +0x07,0x09,0x09,0xce + +# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:0 ; encoding: [0x08,0x0e,0x08,0xce] +0x08,0x0e,0x08,0xce + +# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:0 ; encoding: [0x09,0x13,0x07,0xce] +0x09,0x13,0x07,0xce + +# GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x00,0x96,0xce] +0x0a,0x00,0x96,0xce + +# GFX12: ds_direct_load v11 wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x00,0x95,0xce] +0x0b,0x00,0x95,0xce + +# GFX12: ds_direct_load v12 wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x00,0x94,0xce] +0x0c,0x00,0x94,0xce + +# GFX12: ds_direct_load v13 wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0x00,0x93,0xce] +0x0d,0x00,0x93,0xce + +# GFX12: ds_direct_load v14 wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0x00,0x92,0xce] +0x0e,0x00,0x92,0xce + +# GFX12: ds_direct_load v15 wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0x00,0x91,0xce] +0x0f,0x00,0x91,0xce + +# GFX12: ds_direct_load v16 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0x00,0x90,0xce] +0x10,0x00,0x90,0xce + +# GFX12: ds_direct_load v17 wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0x00,0x90,0xce] +0x11,0x00,0x90,0xce + +# GFX12: ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x9f,0xce] +0x01,0x00,0x9f,0xce + +# GFX12: ds_direct_load v2 wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x00,0x9e,0xce] +0x02,0x00,0x9e,0xce + +# GFX12: ds_direct_load v3 wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x00,0x9d,0xce] +0x03,0x00,0x9d,0xce + +# GFX12: ds_direct_load v4 wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x00,0x9c,0xce] +0x04,0x00,0x9c,0xce + +# GFX12: ds_direct_load v5 wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x9b,0xce] +0x05,0x00,0x9b,0xce + +# GFX12: ds_direct_load v6 wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x00,0x9a,0xce] +0x06,0x00,0x9a,0xce + +# GFX12: ds_direct_load v7 wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x00,0x99,0xce] +0x07,0x00,0x99,0xce + +# GFX12: ds_direct_load v8 wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x00,0x98,0xce] +0x08,0x00,0x98,0xce + +# GFX12: ds_direct_load v9 wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x00,0x97,0xce] +0x09,0x00,0x97,0xce + +# GFX12: ds_param_load v10, attr11.x wait_va_vdst:6 wait_vm_vsrc:1 ; encoding: [0x0a,0x2c,0x86,0xce] +0x0a,0x2c,0x86,0xce + +# GFX12: ds_param_load v11, attr22.y wait_va_vdst:5 wait_vm_vsrc:1 ; encoding: [0x0b,0x59,0x85,0xce] +0x0b,0x59,0x85,0xce + +# GFX12: ds_param_load v12, attr33.z wait_va_vdst:4 wait_vm_vsrc:1 ; encoding: [0x0c,0x86,0x84,0xce] +0x0c,0x86,0x84,0xce + +# GFX12: ds_param_load v13, attr63.x wait_va_vdst:3 wait_vm_vsrc:1 ; encoding: [0x0d,0xfc,0x83,0xce] +0x0d,0xfc,0x83,0xce + +# GFX12: ds_param_load v14, attr63.y wait_va_vdst:2 wait_vm_vsrc:1 ; encoding: [0x0e,0xfd,0x82,0xce] +0x0e,0xfd,0x82,0xce + +# GFX12: ds_param_load v15, attr63.z wait_va_vdst:1 wait_vm_vsrc:1 ; encoding: [0x0f,0xfe,0x81,0xce] +0x0f,0xfe,0x81,0xce + +# GFX12: ds_param_load v16, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x10,0xff,0x80,0xce] +0x10,0xff,0x80,0xce + +# GFX12: ds_param_load v17, attr63.w wait_va_vdst:0 wait_vm_vsrc:1 ; encoding: [0x11,0xff,0x80,0xce] +0x11,0xff,0x80,0xce + +# GFX12: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 ; encoding: [0x01,0x00,0x8f,0xce] +0x01,0x00,0x8f,0xce + +# GFX12: ds_param_load v2, attr0.y wait_va_vdst:14 wait_vm_vsrc:1 ; encoding: [0x02,0x01,0x8e,0xce] +0x02,0x01,0x8e,0xce + +# GFX12: ds_param_load v3, attr0.z wait_va_vdst:13 wait_vm_vsrc:1 ; encoding: [0x03,0x02,0x8d,0xce] +0x03,0x02,0x8d,0xce + +# GFX12: ds_param_load v4, attr0.w wait_va_vdst:12 wait_vm_vsrc:1 ; encoding: [0x04,0x03,0x8c,0xce] +0x04,0x03,0x8c,0xce + +# GFX12: ds_param_load v5, attr0.x wait_va_vdst:11 wait_vm_vsrc:1 ; encoding: [0x05,0x00,0x8b,0xce] +0x05,0x00,0x8b,0xce + +# GFX12: ds_param_load v6, attr1.x wait_va_vdst:10 wait_vm_vsrc:1 ; encoding: [0x06,0x04,0x8a,0xce] +0x06,0x04,0x8a,0xce + +# GFX12: ds_param_load v7, attr2.y wait_va_vdst:9 wait_vm_vsrc:1 ; encoding: [0x07,0x09,0x89,0xce] +0x07,0x09,0x89,0xce + +# GFX12: ds_param_load v8, attr3.z wait_va_vdst:8 wait_vm_vsrc:1 ; encoding: [0x08,0x0e,0x88,0xce] +0x08,0x0e,0x88,0xce + +# GFX12: ds_param_load v9, attr4.w wait_va_vdst:7 wait_vm_vsrc:1 ; encoding: [0x09,0x13,0x87,0xce] +0x09,0x13,0x87,0xce diff --git a/llvm/test/MC/LoongArch/Misc/cfi-advance.s b/llvm/test/MC/LoongArch/Misc/cfi-advance.s new file mode 100644 index 0000000..662c43e --- /dev/null +++ b/llvm/test/MC/LoongArch/Misc/cfi-advance.s @@ -0,0 +1,27 @@ +# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=-relax %s -o %t.o +# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOC %s +# RUN: llvm-dwarfdump --debug-frame %t.o | FileCheck --check-prefix=DWARFDUMP %s + +# RELOC: Relocations [ +# RELOC-NEXT: .rela.eh_frame { +# RELOC-NEXT: 0x1C R_LARCH_32_PCREL .text 0x0 +# RELOC-NEXT: } +# RELOC-NEXT: ] +# DWARFDUMP: DW_CFA_advance_loc: 4 +# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 +# DWARFDUMP-NEXT: DW_CFA_advance_loc: 8 +# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 + + .text + .globl test + .p2align 2 + .type test,@function +test: + .cfi_startproc + nop + .cfi_def_cfa_offset 8 + .p2align 3 + nop + .cfi_def_cfa_offset 8 + nop + .cfi_endproc diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s index c4454f5..1492265 100644 --- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s @@ -23,14 +23,6 @@ # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 # RELAX-NEXT: } # RELAX-NEXT: Section ({{.*}}) .rela.data { -# RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 -# RELAX-NEXT: 0xF R_LARCH_SUB8 .L2 0x0 -# RELAX-NEXT: 0x10 R_LARCH_ADD16 .L3 0x0 -# RELAX-NEXT: 0x10 R_LARCH_SUB16 .L2 0x0 -# RELAX-NEXT: 0x12 R_LARCH_ADD32 .L3 0x0 -# RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 -# RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 -# RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 # RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 # RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 # RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 @@ -43,8 +35,8 @@ # RELAX-NEXT: ] # RELAX: Hex dump of section '.data': -# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 -# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 +# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c +# RELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000000 # RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 .text @@ -63,13 +55,12 @@ .short .L2 - .L1 .word .L2 - .L1 .dword .L2 - .L1 -## With relaxation, emit relocs because of the .align making the diff variable. -## TODO Handle alignment directive. Why they emit relocs now? They returns -## without folding symbols offset in AttemptToFoldSymbolOffsetDifference(). +## TODO Handle alignment directive. .byte .L3 - .L2 .short .L3 - .L2 .word .L3 - .L2 .dword .L3 - .L2 +## With relaxation, emit relocs because the la.pcrel makes the diff variable. .byte .L4 - .L3 .short .L4 - .L3 .word .L4 - .L3 diff --git a/llvm/test/MC/RISCV/cfi-advance.s b/llvm/test/MC/RISCV/cfi-advance.s index d9224fd..c4af390 100644 --- a/llvm/test/MC/RISCV/cfi-advance.s +++ b/llvm/test/MC/RISCV/cfi-advance.s @@ -5,6 +5,8 @@ # CHECK: .rela.eh_frame { # CHECK-NEXT: 0x1C R_RISCV_32_PCREL <null> 0x0 +# CHECK-NEXT: 0x35 R_RISCV_SET6 <null> 0x0 +# CHECK-NEXT: 0x35 R_RISCV_SUB6 <null> 0x0 # CHECK-NEXT: } # CHECK-DWARFDUMP: DW_CFA_advance_loc1: 104 # CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 @@ -12,6 +14,8 @@ # CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 # CHECK-DWARFDUMP-NEXT: DW_CFA_advance_loc4: 65539 # CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 +# CHECK-DWARFDUMP-NEXT: DW_CFA_advance_loc: 10 +# CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 .text .globl test # -- Begin function test .p2align 1 @@ -28,4 +32,7 @@ test: .zero 65535, 0x90 .cfi_def_cfa_offset 8 nop + .p2align 3 + .cfi_def_cfa_offset 8 + nop .cfi_endproc diff --git a/llvm/test/MC/RISCV/fixups-expr.s b/llvm/test/MC/RISCV/fixups-expr.s index 8a02d29..63e7f2e 100644 --- a/llvm/test/MC/RISCV/fixups-expr.s +++ b/llvm/test/MC/RISCV/fixups-expr.s @@ -16,11 +16,15 @@ .globl G1 .globl G2 +.globl G3 .L1: G1: call extern .L2: G2: + .p2align 3 +.L3: +G3: .data .dword .L2-.L1 @@ -31,6 +35,14 @@ G2: .half G2-G1 .byte .L2-.L1 .byte G2-G1 +.dword .L3-.L2 +.dword G3-G2 +.word .L3-.L2 +.word G3-G2 +.half .L3-.L2 +.half G3-G2 +.byte .L3-.L2 +.byte G3-G2 # RELAX: .rela.data { # RELAX-NEXT: 0x0 R_RISCV_ADD64 .L2 0x0 # RELAX-NEXT: 0x0 R_RISCV_SUB64 .L1 0x0 @@ -48,4 +60,20 @@ G2: # RELAX-NEXT: 0x1C R_RISCV_SUB8 .L1 0x0 # RELAX-NEXT: 0x1D R_RISCV_ADD8 G2 0x0 # RELAX-NEXT: 0x1D R_RISCV_SUB8 G1 0x0 +# RELAX-NEXT: 0x1E R_RISCV_ADD64 .L3 0x0 +# RELAX-NEXT: 0x1E R_RISCV_SUB64 .L2 0x0 +# RELAX-NEXT: 0x26 R_RISCV_ADD64 G3 0x0 +# RELAX-NEXT: 0x26 R_RISCV_SUB64 G2 0x0 +# RELAX-NEXT: 0x2E R_RISCV_ADD32 .L3 0x0 +# RELAX-NEXT: 0x2E R_RISCV_SUB32 .L2 0x0 +# RELAX-NEXT: 0x32 R_RISCV_ADD32 G3 0x0 +# RELAX-NEXT: 0x32 R_RISCV_SUB32 G2 0x0 +# RELAX-NEXT: 0x36 R_RISCV_ADD16 .L3 0x0 +# RELAX-NEXT: 0x36 R_RISCV_SUB16 .L2 0x0 +# RELAX-NEXT: 0x38 R_RISCV_ADD16 G3 0x0 +# RELAX-NEXT: 0x38 R_RISCV_SUB16 G2 0x0 +# RELAX-NEXT: 0x3A R_RISCV_ADD8 .L3 0x0 +# RELAX-NEXT: 0x3A R_RISCV_SUB8 .L2 0x0 +# RELAX-NEXT: 0x3B R_RISCV_ADD8 G3 0x0 +# RELAX-NEXT: 0x3B R_RISCV_SUB8 G2 0x0 # RELAX-NEXT: } diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll index 0d90bc2..5ae559d 100644 --- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll +++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll @@ -316,12 +316,13 @@ define i1 @sub_nuw_neg_i16(i16 %a) { ; CHECK-LABEL: @sub_nuw_neg_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NEG2:%.*]] = sub nuw i16 [[A:%.*]], -305 -; CHECK-NEXT: br i1 false, label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 0, [[NEG2]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] ; CHECK: exit.1: -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_2]] ; CHECK: exit.2: -; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 0 -; CHECK-NEXT: ret i1 [[C_3]] +; CHECK-NEXT: ret i1 true ; entry: %neg2 = sub nuw i16 %a, -305 @@ -379,3 +380,117 @@ entry: %c = icmp ugt i64 %neg2, 0 ret i1 %c } + +define i1 @pr76713(i16 %i1, i16 %i3) { +; CHECK-LABEL: @pr76713( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C1:%.*]] = icmp ult i16 [[I1:%.*]], -1 +; CHECK-NEXT: [[C2:%.*]] = icmp uge i16 [[I1]], -3 +; CHECK-NEXT: [[C3:%.*]] = icmp ult i16 [[I3:%.*]], 2 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C1]], [[C2]] +; CHECK-NEXT: [[AND_2:%.*]] = and i1 [[AND]], [[C3]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i16 [[I1]], -3 +; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = mul nuw nsw i16 [[I3]], 4 +; CHECK-NEXT: [[I6:%.*]] = add nuw nsw i16 [[ARRAYIDX_IDX]], [[SUB]] +; CHECK-NEXT: [[C4:%.*]] = icmp ult i16 12, [[I6]] +; CHECK-NEXT: ret i1 [[C4]] +; CHECK: else: +; CHECK-NEXT: ret i1 false +; +entry: + %c1 = icmp ult i16 %i1, -1 + %c2 = icmp uge i16 %i1, -3 + %c3 = icmp ult i16 %i3, 2 + %and = and i1 %c1, %c2 + %and.2 = and i1 %and, %c3 + br i1 %and, label %then, label %else + +then: + %sub = sub nuw nsw i16 %i1, -3 + %arrayidx.idx = mul nuw nsw i16 %i3, 4 + %i6 = add nuw nsw i16 %arrayidx.idx, %sub + %c4 = icmp ult i16 12, %i6 + ret i1 %c4 + +else: + ret i1 0 +} + +define void @sub_nuw_chained_positive_constants(i16 %a) { +; CHECK-LABEL: @sub_nuw_chained_positive_constants( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB1:%.*]] = sub nuw i16 [[A:%.*]], 10 +; CHECK-NEXT: [[SUB2:%.*]] = sub nuw i16 [[SUB1]], 20 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 [[SUB2]], 90 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 121 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: ret void +; CHECK: exit.2: +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: ret void +; +entry: + %sub1 = sub nuw i16 %a, 10 + %sub2 = sub nuw i16 %sub1, 20 + %c.1 = icmp ugt i16 %sub2, 90 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i16 %a, 120 + call void @use(i1 %c.2) + %c.3 = icmp ugt i16 %a, 121 + call void @use(i1 %c.3) + ret void + +exit.2: + %c.4 = icmp ugt i16 %a, 120 + call void @use(i1 %c.4) + %c.5 = icmp ugt i16 %a, 121 + call void @use(i1 %c.5) + ret void +} + +define void @sub_nuw_chained_negative_constants(i8 %a) { +; CHECK-LABEL: @sub_nuw_chained_negative_constants( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB1:%.*]] = sub nuw i8 [[A:%.*]], 10 +; CHECK-NEXT: [[SUB2:%.*]] = sub nuw i8 [[SUB1]], -126 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[SUB2]], 20 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i8 [[A]], -95 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: ret void +; CHECK: exit.2: +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: ret void +; +entry: + %sub1 = sub nuw i8 %a, 10 + %sub2 = sub nuw i8 %sub1, 130 + %c.1 = icmp ugt i8 %sub2, 20 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i8 %a, 160 + call void @use(i1 %c.2) + %c.3 = icmp ugt i8 %a, 161 + call void @use(i1 %c.3) + ret void + + +exit.2: + %c.4 = icmp ugt i8 %a, 160 + call void @use(i1 %c.4) + %c.5 = icmp ugt i8 %a, 161 + call void @use(i1 %c.5) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/and-xor-merge.ll b/llvm/test/Transforms/InstCombine/and-xor-merge.ll index 5433364..e6df4e3 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-merge.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-merge.ll @@ -40,3 +40,42 @@ define i32 @PR38781(i32 %a, i32 %b) { %and = and i32 %b.lobit.not, %a.lobit.not ret i32 %and } + +; (a ^ 4) & (a ^ ~4) -> 0 +define i32 @PR75692_1(i32 %x) { +; CHECK-LABEL: @PR75692_1 +; CHECK-NEXT: ret i32 0 +; + %t2 = xor i32 %x, 4 + %t3 = xor i32 %x, -5 + %t4 = and i32 %t2, %t3 + ret i32 %t4 +} + +; (a ^ 4) & (a ^ 3) is not zero +define i32 @PR75692_2(i32 %x) { +; CHECK-LABEL: @PR75692_2 +; CHECK-NEXT: %t2 = xor i32 %x, 4 +; CHECK-NEXT: %t3 = xor i32 %x, -4 +; CHECK-NEXT: %t4 = and i32 %t2, %t3 +; CHECK-NEXT: ret i32 %t4 +; + %t2 = xor i32 %x, 4 + %t3 = xor i32 %x, -4 + %t4 = and i32 %t2, %t3 + ret i32 %t4 +} + +; (a ^ 4) & (b ^ ~4) is not zero, since a != b is possible +define i32 @PR75692_3(i32 %x, i32 %y) { +; CHECK-LABEL: @PR75692_3 +; CHECK-NEXT: %t2 = xor i32 %x, 4 +; CHECK-NEXT: %t3 = xor i32 %y, -5 +; CHECK-NEXT: %t4 = and i32 %t2, %t3 +; CHECK-NEXT: ret i32 %t4 +; + %t2 = xor i32 %x, 4 + %t3 = xor i32 %y, -5 + %t4 = and i32 %t2, %t3 + ret i32 %t4 +} diff --git a/llvm/test/Transforms/InstCombine/or-xor.ll b/llvm/test/Transforms/InstCombine/or-xor.ll index 1d35332..361aab6 100644 --- a/llvm/test/Transforms/InstCombine/or-xor.ll +++ b/llvm/test/Transforms/InstCombine/or-xor.ll @@ -1055,3 +1055,42 @@ define i8 @or_nand_xor_common_op_commute3_use3(i8 %x, i8 %y, i8 %z) { %r = or i8 %xor, %nand ret i8 %r } + +; (a ^ 4) & (a ^ ~4) -> -1 +define i32 @PR75692_1(i32 %x) { +; CHECK-LABEL: @PR75692_1( +; CHECK-NEXT: ret i32 -1 +; + %t2 = xor i32 %x, 4 + %t3 = xor i32 %x, -5 + %t4 = or i32 %t2, %t3 + ret i32 %t4 +} + +; (a ^ 4) & (a ^ 3) is not -1 +define i32 @PR75692_2(i32 %x) { +; CHECK-LABEL: @PR75692_2 +; CHECK-NEXT: %t2 = xor i32 %x, 4 +; CHECK-NEXT: %t3 = xor i32 %x, -4 +; CHECK-NEXT: %t4 = or i32 %t2, %t3 +; CHECK-NEXT: ret i32 %t4 +; + %t2 = xor i32 %x, 4 + %t3 = xor i32 %x, -4 + %t4 = or i32 %t2, %t3 + ret i32 %t4 +} + +; (a ^ 4) & (b ^ ~4) is not -1, since a != b is possible +define i32 @PR75692_3(i32 %x, i32 %y) { +; CHECK-LABEL: @PR75692_3 +; CHECK-NEXT: %t2 = xor i32 %x, 4 +; CHECK-NEXT: %t3 = xor i32 %y, -5 +; CHECK-NEXT: %t4 = or i32 %t2, %t3 +; CHECK-NEXT: ret i32 %t4 +; + %t2 = xor i32 %x, 4 + %t3 = xor i32 %y, -5 + %t4 = or i32 %t2, %t3 + ret i32 %t4 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll index 3bccfac..b762c3a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather_extract_from_vectorbuild.ll @@ -31,3 +31,30 @@ loop: %i4 = extractelement <2 x float> %ins1, i64 0 br label %loop } + +define void @test1() { +; CHECK-LABEL: define void @test1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2]] = select <2 x i1> zeroinitializer, <2 x float> [[TMP1]], <2 x float> zeroinitializer +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %ph0 = phi float [ 0.000000e+00, %entry ], [ %i4, %loop ] + %ph1 = phi float [ 0.000000e+00, %entry ], [ %i5, %loop ] + %i = fadd float 0.000000e+00, %ph0 + %i1 = fadd float 0.000000e+00, %ph1 + %i2 = select i1 false, float %i, float 0.000000e+00 + %i3 = select i1 false, float %i1, float 0.000000e+00 + %ins0 = insertelement <2 x float> zeroinitializer, float %i2, i64 0 + %ins1 = insertelement <2 x float> %ins0, float %i3, i64 1 + %i4 = extractelement <2 x float> %ins1, i64 0 + %i5 = extractelement <2 x float> %ins1, i64 1 + br label %loop +} diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll new file mode 100644 index 0000000..bead0dc --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default-lookup-table.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt %s -S -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -switch-range-to-icmp | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define i64 @test_1(i64 %0) { +; CHECK-LABEL: define i64 @test_1( +; CHECK-SAME: i64 [[TMP0:%.*]]) { +; CHECK-NEXT: switch.lookup: +; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], ptr @switch.table.test_1, i32 0, i64 [[TMP1]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8 +; CHECK-NEXT: ret i64 [[SWITCH_LOAD]] +; + %2 = urem i64 %0, 4 + switch i64 %2, label %5 [ + i64 1, label %3 + i64 2, label %3 + i64 3, label %4 + ] + +3: + br label %5 + +4: + br label %5 + +5: + %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ] + ret i64 %.0 +} + + +define i64 @test_2(i64 %0) { +; CHECK-LABEL: define i64 @test_2( +; CHECK-SAME: i64 [[TMP0:%.*]]) { +; CHECK-NEXT: switch.lookup: +; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: ret i64 [[TMP1]] +; + %2 = urem i64 %0, 4 + switch i64 %2, label %6 [ + i64 1, label %3 + i64 2, label %4 + i64 3, label %5 + ] + +3: + br label %6 + +4: + br label %6 + +5: + br label %6 + +6: + %.0 = phi i64 [ 0, %1 ], [ 1, %3 ], [ 2, %4 ], [ 3, %5 ] + ret i64 %.0 +} + diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll index 7c0d5e4..e30a535 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll @@ -79,15 +79,15 @@ default: ret void } -; This one is a negative test - we know the value of the default, -; but that's about it +; We can replace the default branch with case 3 since it is the only case that is missing. define void @test3(i2 %a) { ; CHECK-LABEL: define void @test3( ; CHECK-SAME: i2 [[A:%.*]]) { -; CHECK-NEXT: switch i2 [[A]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: switch i2 [[A]], label [[DOTUNREACHABLEDEFAULT:%.*]] [ ; CHECK-NEXT: i2 0, label [[CASE0:%.*]] ; CHECK-NEXT: i2 1, label [[CASE1:%.*]] ; CHECK-NEXT: i2 -2, label [[CASE2:%.*]] +; CHECK-NEXT: i2 -1, label [[DEFAULT:%.*]] ; CHECK-NEXT: ] ; CHECK: common.ret: ; CHECK-NEXT: ret void @@ -100,6 +100,8 @@ define void @test3(i2 %a) { ; CHECK: case2: ; CHECK-NEXT: call void @foo(i32 2) ; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK: .unreachabledefault: +; CHECK-NEXT: unreachable ; CHECK: default: ; CHECK-NEXT: call void @foo(i32 3) ; CHECK-NEXT: br label [[COMMON_RET]] @@ -122,6 +124,50 @@ default: ret void } +define void @test3_prof(i2 %a) { +; CHECK-LABEL: define void @test3_prof( +; CHECK-SAME: i2 [[A:%.*]]) { +; CHECK-NEXT: switch i2 [[A]], label [[DOTUNREACHABLEDEFAULT:%.*]] [ +; CHECK-NEXT: i2 0, label [[CASE0:%.*]] +; CHECK-NEXT: i2 1, label [[CASE1:%.*]] +; CHECK-NEXT: i2 -2, label [[CASE2:%.*]] +; CHECK-NEXT: i2 -1, label [[DEFAULT:%.*]] +; CHECK-NEXT: ], !prof [[PROF0:![0-9]+]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: case0: +; CHECK-NEXT: call void @foo(i32 0) +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: case1: +; CHECK-NEXT: call void @foo(i32 1) +; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK: case2: +; CHECK-NEXT: call void @foo(i32 2) +; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK: .unreachabledefault: +; CHECK-NEXT: unreachable +; CHECK: default: +; CHECK-NEXT: call void @foo(i32 3) +; CHECK-NEXT: br label [[COMMON_RET]] +; + switch i2 %a, label %default [i2 0, label %case0 + i2 1, label %case1 + i2 2, label %case2], !prof !0 + +case0: + call void @foo(i32 0) + ret void +case1: + call void @foo(i32 1) + ret void +case2: + call void @foo(i32 2) + ret void +default: + call void @foo(i32 3) + ret void +} + ; Negative test - check for possible overflow when computing ; number of possible cases. define void @test4(i128 %a) { @@ -267,3 +313,7 @@ default: declare void @llvm.assume(i1) +!0 = !{!"branch_weights", i32 8, i32 4, i32 2, i32 1} +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 0, i32 4, i32 2, i32 1, i32 8} +;. diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll index a407986..67ca00b 100644 --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -65,6 +65,32 @@ define float @call_llvm.log10.f32(float %in) { } declare float @llvm.log10.f32(float) #0 + +; SVML: declare <2 x double> @__svml_sin2(<2 x double>) +; SVML: declare <4 x double> @__svml_sin4(<4 x double>) +; SVML: declare <8 x double> @__svml_sin8(<8 x double>) +; SVML: declare <4 x float> @__svml_log10f4(<4 x float>) +; SVML: declare <8 x float> @__svml_log10f8(<8 x float>) +; SVML: declare <16 x float> @__svml_log10f16(<16 x float>) + +; MASSV: declare <2 x double> @__sind2(<2 x double>) +; MASSV: declare <4 x float> @__log10f4(<4 x float>) + +; LIBMVEC-X86: declare <2 x double> @_ZGVbN2v_sin(<2 x double>) +; LIBMVEC-X86: declare <4 x double> @_ZGVdN4v_sin(<4 x double>) + +; ACCELERATE: declare <4 x float> @vlog10f(<4 x float>) + +; SLEEFGNUABI: declare <2 x double> @_ZGVnN2v_sin(<2 x double>) +; SLEEFGNUABI: declare <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double>, <vscale x 2 x i1>) +; SLEEFGNUABI: declare <4 x float> @_ZGVnN4v_log10f(<4 x float>) +; SLEEFGNUABI: declare <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float>, <vscale x 4 x i1>) + +; ARMPL: declare <2 x double> @armpl_vsinq_f64(<2 x double>) +; ARMPL: declare <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double>, <vscale x 2 x i1>) +; ARMPL: declare <4 x float> @armpl_vlog10q_f32(<4 x float>) +; ARMPL: declare <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float>, <vscale x 4 x i1>) + attributes #0 = { nounwind readnone } ; SVML: attributes #[[SIN]] = { "vector-function-abi-variant"= diff --git a/llvm/test/tools/llvm-cxxfilt/no-params.test b/llvm/test/tools/llvm-cxxfilt/no-params.test deleted file mode 100644 index 17cbbbe..0000000 --- a/llvm/test/tools/llvm-cxxfilt/no-params.test +++ /dev/null @@ -1,34 +0,0 @@ -RUN: llvm-cxxfilt _Z3fooILZ3BarEET_f _Z3fooIPFcfEET_d _ZN1f2baC2ERKNS_2baIT_EE _Z3foov.123 | FileCheck %s --check-prefix=CHECK-PARAMS -RUN: llvm-cxxfilt -p _Z3fooILZ3BarEET_f _Z3fooIPFcfEET_d _ZN1f2baC2ERKNS_2baIT_EE _Z3foov.123 | FileCheck %s --check-prefix=CHECK-NO-PARAMS --match-full-lines -RUN: llvm-cxxfilt --no-params _Z3fooILZ3BarEET_f _Z3fooIPFcfEET_d _ZN1f2baC2ERKNS_2baIT_EE _Z3foov.123 | FileCheck %s --check-prefix=CHECK-NO-PARAMS --match-full-lines - -# Check that -p or --no-params flag omits function parameters and the return -# type. - -CHECK-PARAMS: Bar foo<Bar>(float) -CHECK-NO-PARAMS: foo<Bar> - -# Check that only the top-level function is impacted by the switch, and that -# nested function types in the encoding (e.g. where a function type is being -# used as a template parameter) still include their parameters. -# -# template <typename T> T foo(double); -# typedef char (*F)(float); -# F foo<F>(double) - -CHECK-PARAMS: char (*foo<char (*)(float)>(double))(float) -CHECK-NO-PARAMS: foo<char (*)(float)> - -# Use an invalid mangled name broken in the function parameters to check how -p -# or --no-params flag works. If the option is given we should be able to -# demangle the function name just fine. If it is not given, demangling will fail -# because of the invalid params. - -CHECK-PARAMS: _ZN1f2baC2ERKNS_2baIT_EE -CHECK-NO-PARAMS: f::ba::ba - -# Check that a vendor specific suffix is also omitted when --no-params is -# specified. This matches c++filt's behaviour. - -CHECK-PARAMS: foo() (.123) -CHECK-NO-PARAMS: foo diff --git a/llvm/tools/lli/ExecutionUtils.cpp b/llvm/tools/lli/ExecutionUtils.cpp index 55370ed..b6cc3bb 100644 --- a/llvm/tools/lli/ExecutionUtils.cpp +++ b/llvm/tools/lli/ExecutionUtils.cpp @@ -8,6 +8,7 @@ #include "ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" @@ -15,34 +16,6 @@ #include <cstdint> #include <vector> -// Declarations follow the GDB JIT interface (version 1, 2009) and must match -// those of the DYLD used for testing. See: -// -// llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp -// llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp -// -typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN -} jit_actions_t; - -struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -}; - -struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; -}; - namespace llvm { template <typename... Ts> static void outsv(const char *Fmt, Ts &&...Vals) { @@ -61,6 +34,9 @@ static const char *actionFlagToStr(uint32_t ActionFlag) { return "<invalid action_flag>"; } +// Declarations follow the GDB JIT interface (version 1, 2009) and must match +// those of the DYLD used for testing. +// // Sample output: // // Reading __jit_debug_descriptor at 0x0000000000404048 diff --git a/llvm/tools/llvm-cxxfilt/Opts.td b/llvm/tools/llvm-cxxfilt/Opts.td index 034cb26..f652a1a 100644 --- a/llvm/tools/llvm-cxxfilt/Opts.td +++ b/llvm/tools/llvm-cxxfilt/Opts.td @@ -17,7 +17,6 @@ multiclass Eq<string name, string help> { def help : FF<"help", "Display this help">; defm strip_underscore : BB<"strip-underscore", "Strip the leading underscore", "Don't strip the leading underscore">; def types : FF<"types", "Attempt to demangle types as well as function names">; -def no_params : FF<"no-params", "Skip function parameters and return types">; def version : FF<"version", "Display the version">; defm : Eq<"format", "Specify mangling format. Currently ignored because only 'gnu' is supported">; @@ -26,5 +25,4 @@ def : F<"s", "Alias for --format">; def : F<"_", "Alias for --strip-underscore">, Alias<strip_underscore>; def : F<"h", "Alias for --help">, Alias<help>; def : F<"n", "Alias for --no-strip-underscore">, Alias<no_strip_underscore>; -def : F<"p", "Alias for --no-params">, Alias<no_params>; def : F<"t", "Alias for --types">, Alias<types>; diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp index 26a1f2f..4b9d88a 100644 --- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp +++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp @@ -54,7 +54,6 @@ public: }; } // namespace -static bool ParseParams; static bool StripUnderscore; static bool Types; @@ -75,19 +74,18 @@ static std::string demangle(const std::string &Mangled) { } std::string Result; - if (nonMicrosoftDemangle(DecoratedStr, Result, CanHaveLeadingDot, - ParseParams)) + if (nonMicrosoftDemangle(DecoratedStr, Result, CanHaveLeadingDot)) return Result; std::string Prefix; char *Undecorated = nullptr; if (Types) - Undecorated = itaniumDemangle(DecoratedStr, ParseParams); + Undecorated = itaniumDemangle(DecoratedStr); if (!Undecorated && starts_with(DecoratedStr, "__imp_")) { Prefix = "import thunk for "; - Undecorated = itaniumDemangle(DecoratedStr.substr(6), ParseParams); + Undecorated = itaniumDemangle(DecoratedStr.substr(6)); } Result = Undecorated ? Prefix + Undecorated : Mangled; @@ -175,8 +173,6 @@ int llvm_cxxfilt_main(int argc, char **argv, const llvm::ToolContext &) { else StripUnderscore = Triple(sys::getProcessTriple()).isOSBinFormatMachO(); - ParseParams = !Args.hasArg(OPT_no_params); - Types = Args.hasArg(OPT_types); std::vector<std::string> Decorated = Args.getAllArgValues(OPT_INPUT); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-executor/llvm-jitlink-executor.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-executor/llvm-jitlink-executor.cpp index 71c83f2..20205ee 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-executor/llvm-jitlink-executor.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-executor/llvm-jitlink-executor.cpp @@ -54,9 +54,9 @@ void printErrorAndExit(Twine ErrMsg) { errs() << "error: " << ErrMsg.str() << "\n\n" << "Usage:\n" << " llvm-jitlink-executor " << DebugOption - << "filedescs=<infd>,<outfd> [args...]\n" + << "[test-jitloadergdb] filedescs=<infd>,<outfd> [args...]\n" << " llvm-jitlink-executor " << DebugOption - << "listen=<host>:<port> [args...]\n"; + << "[test-jitloadergdb] listen=<host>:<port> [args...]\n"; exit(1); } @@ -112,6 +112,17 @@ int openListener(std::string Host, std::string PortStr) { #endif // LLVM_ON_UNIX } +// JITLink debug support plugins put information about JITed code in this GDB +// JIT Interface global from OrcTargetProcess. +extern "C" struct jit_descriptor __jit_debug_descriptor; + +static void *findLastDebugDescriptorEntryPtr() { + struct jit_code_entry *Last = __jit_debug_descriptor.first_entry; + while (Last && Last->next_entry) + Last = Last->next_entry; + return Last; +} + int main(int argc, char *argv[]) { #if LLVM_ENABLE_THREADS @@ -123,39 +134,46 @@ int main(int argc, char *argv[]) { if (argc < 2) printErrorAndExit("insufficient arguments"); - else { - StringRef ConnectArg = argv[FirstProgramArg++]; + StringRef NextArg = argv[FirstProgramArg++]; #ifndef NDEBUG - if (ConnectArg == "debug") { - DebugFlag = true; - ConnectArg = argv[FirstProgramArg++]; - } + if (NextArg == "debug") { + DebugFlag = true; + NextArg = argv[FirstProgramArg++]; + } #endif - StringRef SpecifierType, Specifier; - std::tie(SpecifierType, Specifier) = ConnectArg.split('='); - if (SpecifierType == "filedescs") { - StringRef FD1Str, FD2Str; - std::tie(FD1Str, FD2Str) = Specifier.split(','); - if (FD1Str.getAsInteger(10, InFD)) - printErrorAndExit(FD1Str + " is not a valid file descriptor"); - if (FD2Str.getAsInteger(10, OutFD)) - printErrorAndExit(FD2Str + " is not a valid file descriptor"); - } else if (SpecifierType == "listen") { - StringRef Host, PortStr; - std::tie(Host, PortStr) = Specifier.split(':'); - - int Port = 0; - if (PortStr.getAsInteger(10, Port)) - printErrorAndExit("port number '" + PortStr + - "' is not a valid integer"); - - InFD = OutFD = openListener(Host.str(), PortStr.str()); - } else - printErrorAndExit("invalid specifier type \"" + SpecifierType + "\""); + std::vector<StringRef> TestOutputFlags; + while (NextArg.starts_with("test-")) { + TestOutputFlags.push_back(NextArg); + NextArg = argv[FirstProgramArg++]; } + if (llvm::is_contained(TestOutputFlags, "test-jitloadergdb")) + fprintf(stderr, "__jit_debug_descriptor.last_entry = 0x%016" PRIx64 "\n", + pointerToJITTargetAddress(findLastDebugDescriptorEntryPtr())); + + StringRef SpecifierType, Specifier; + std::tie(SpecifierType, Specifier) = NextArg.split('='); + if (SpecifierType == "filedescs") { + StringRef FD1Str, FD2Str; + std::tie(FD1Str, FD2Str) = Specifier.split(','); + if (FD1Str.getAsInteger(10, InFD)) + printErrorAndExit(FD1Str + " is not a valid file descriptor"); + if (FD2Str.getAsInteger(10, OutFD)) + printErrorAndExit(FD2Str + " is not a valid file descriptor"); + } else if (SpecifierType == "listen") { + StringRef Host, PortStr; + std::tie(Host, PortStr) = Specifier.split(':'); + + int Port = 0; + if (PortStr.getAsInteger(10, Port)) + printErrorAndExit("port number '" + PortStr + "' is not a valid integer"); + + InFD = OutFD = openListener(Host.str(), PortStr.str()); + } else + printErrorAndExit("invalid specifier type \"" + SpecifierType + "\""); + auto Server = ExitOnErr(SimpleRemoteEPCServer::Create<FDSimpleRemoteEPCTransport>( [](SimpleRemoteEPCServer::Setup &S) -> Error { @@ -173,6 +191,11 @@ int main(int argc, char *argv[]) { InFD, OutFD)); ExitOnErr(Server->waitForDisconnect()); + + if (llvm::is_contained(TestOutputFlags, "test-jitloadergdb")) + fprintf(stderr, "__jit_debug_descriptor.last_entry = 0x%016" PRIx64 "\n", + pointerToJITTargetAddress(findLastDebugDescriptorEntryPtr())); + return 0; #else diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp index 193a281..4bd4722 100644 --- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp +++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp @@ -108,20 +108,12 @@ getInterfaceFile(const StringRef Filename, bool ResetBanner = true) { LLVM_FALLTHROUGH; case file_magic::macho_dynamically_linked_shared_lib_stub: LLVM_FALLTHROUGH; - case file_magic::macho_universal_binary: { - auto IFOrErr = DylibReader::get(Buffer->getMemBufferRef()); - if (!IFOrErr) - ExitOnErr(IFOrErr.takeError()); - IF = std::move(*IFOrErr); + case file_magic::macho_universal_binary: + IF = ExitOnErr(DylibReader::get(Buffer->getMemBufferRef())); break; - } - case file_magic::tapi_file: { - auto IFOrErr = TextAPIReader::get(Buffer->getMemBufferRef()); - if (!IFOrErr) - ExitOnErr(IFOrErr.takeError()); - IF = std::move(*IFOrErr); + case file_magic::tapi_file: + IF = ExitOnErr(TextAPIReader::get(Buffer->getMemBufferRef())); break; - } default: reportError(Filename + ": unsupported file type"); } @@ -170,10 +162,7 @@ static bool handleMergeAction(const Context &Ctx) { Out = std::move(IF); continue; } - auto ResultIF = Out->merge(IF.get()); - if (!ResultIF) - ExitOnErr(ResultIF.takeError()); - Out = std::move(ResultIF.get()); + Out = ExitOnErr(Out->merge(IF.get())); } return handleWriteAction(Ctx, std::move(Out)); } diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 3fb10db..d7876b7 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -3656,12 +3656,14 @@ TEST_F(AArch64GISelMITest, CreateLibcall) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; + LostDebugLocObserver DummyLocObserver(""); LLVMContext &Ctx = MF->getFunction().getContext(); auto *RetTy = Type::getVoidTy(Ctx); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - createLibcall(B, "abort", {{}, RetTy, 0}, {}, CallingConv::C)); + createLibcall(B, "abort", {{}, RetTy, 0}, {}, CallingConv::C, + DummyLocObserver, nullptr)); auto CheckStr = R"( CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp diff --git a/llvm/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp b/llvm/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp index 12f4263..061db56 100644 --- a/llvm/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp @@ -512,35 +512,9 @@ TEST_F(OrcCAPITestBase, AddObjectBuffer) { ASSERT_TRUE(!!SumAddr); } -// This must be kept in sync with gdb/gdb/jit.h . -extern "C" { - -typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN -} jit_actions_t; - -struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -}; - -struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; -}; - -// We put information about the JITed function in this global, which the -// debugger reads. Make sure to specify the version statically, because the -// debugger checks the version before we can set it during runtime. -extern struct jit_descriptor __jit_debug_descriptor; +// JITLink debug support plugins put information about JITed code in this GDB +// JIT Interface global from OrcTargetProcess. +extern "C" struct jit_descriptor __jit_debug_descriptor; static void *findLastDebugDescriptorEntryPtr() { struct jit_code_entry *Last = __jit_debug_descriptor.first_entry; @@ -548,7 +522,6 @@ static void *findLastDebugDescriptorEntryPtr() { Last = Last->next_entry; return Last; } -} #if defined(_AIX) or not(defined(__ELF__) or defined(__MACH__)) TEST_F(OrcCAPITestBase, DISABLED_EnableDebugSupport) { |