aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYaxun (Sam) Liu <yaxun.liu@amd.com>2020-06-09 15:02:53 -0400
committerYaxun (Sam) Liu <yaxun.liu@amd.com>2020-06-15 18:55:01 -0400
commite8090d83fd92cc6a9d03b48c91f979bbfa6bb1fc (patch)
tree0046fea36b48832bc4031e1bd8b23b2b8ad6c905
parent64ec505dd46832880350e8b45316eaed30b458dc (diff)
downloadllvm-e8090d83fd92cc6a9d03b48c91f979bbfa6bb1fc.zip
llvm-e8090d83fd92cc6a9d03b48c91f979bbfa6bb1fc.tar.gz
llvm-e8090d83fd92cc6a9d03b48c91f979bbfa6bb1fc.tar.bz2
[HIP] Do not call opt/llc for -fno-gpu-rdc
Currently HIP toolchain calls clang to emit bitcode then calls opt/llc for device compilation for the default -fno-gpu-rdc case, which is unnecessary since clang is able to compile a single source file to ISA. This patch fixes the HIP action builder and toolchain so that the default -fno-gpu-rdc can be done like a canonical toolchain, i.e. one clang -cc1 invocation to compile source code to ISA. This can avoid unnecessary processes to speed up the compilation, and avoid redundant LLVM passes which are performed in clang -cc1 and opt. Differential Revision: https://reviews.llvm.org/D81627
-rw-r--r--clang/lib/Driver/Driver.cpp9
-rw-r--r--clang/lib/Driver/ToolChains/HIP.cpp5
-rw-r--r--clang/test/Driver/cuda-phases.cu217
-rw-r--r--clang/test/Driver/hip-binding.hip18
-rw-r--r--clang/test/Driver/hip-device-compile.hip8
-rw-r--r--clang/test/Driver/hip-phases.hip214
-rw-r--r--clang/test/Driver/hip-save-temps.hip17
-rw-r--r--clang/test/Driver/hip-toolchain-mllvm.hip26
-rw-r--r--clang/test/Driver/hip-toolchain-no-rdc.hip72
-rw-r--r--clang/test/Driver/hip-toolchain-opt.hip22
10 files changed, 357 insertions, 251 deletions
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 19faf09..33c4a07 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2722,10 +2722,15 @@ class OffloadingActionBuilder final {
// a fat binary containing all the code objects for different GPU's.
// The fat binary is then an input to the host action.
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+ auto BackendAction = C.getDriver().ConstructPhaseAction(
+ C, Args, phases::Backend, CudaDeviceActions[I],
+ AssociatedOffloadKind);
+ auto AssembleAction = C.getDriver().ConstructPhaseAction(
+ C, Args, phases::Assemble, BackendAction, AssociatedOffloadKind);
// Create a link action to link device IR with device library
// and generate ISA.
ActionList AL;
- AL.push_back(CudaDeviceActions[I]);
+ AL.push_back(AssembleAction);
CudaDeviceActions[I] =
C.MakeAction<LinkJobAction>(AL, types::TY_Image);
@@ -2779,7 +2784,7 @@ class OffloadingActionBuilder final {
}
// By default, we produce an action for each device arch.
- if (!Relocatable || CurPhase <= phases::Backend) {
+ if (!Relocatable || CurPhase < phases::Backend || CompileDeviceOnly) {
for (Action *&A : CudaDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
AssociatedOffloadKind);
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 7e58fe7..af81100 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -243,6 +243,11 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (JA.getType() == types::TY_HIP_FATBIN)
return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
+ assert(Inputs.size());
+ if (Inputs.size() == 1 && Inputs[0].getType() == types::TY_Object)
+ return constructLldCommand(C, JA, Inputs, Output, Args,
+ Inputs[0].getFilename());
+
assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
"Unsupported target");
diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu
index acbf345..8d5c547 100644
--- a/clang/test/Driver/cuda-phases.cu
+++ b/clang/test/Driver/cuda-phases.cu
@@ -7,71 +7,41 @@
// REQUIRES: clang-driver
// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target
-// REQUIRES: amdgpu-registered-target
//
// Test single gpu architecture with complete compilation.
//
// Test CUDA NVPTX phases.
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN,BIN_NV %s
+// RUN: | FileCheck -check-prefixes=BIN %s
//
-// Test HIP AMDGPU -fgpu-rdc phases.
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_RDC %s
-//
-// Test HIP AMDGPU -fno-gpu-rdc phases (default).
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_NRDC %s
-//
-// BIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
-// BIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
+// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
-// BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]])
-// BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:gfx803]])
+// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]])
// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
-// BIN_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
-// BIN_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
-// BIN_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object
-// BIN_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler
-// BIN_NV-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]])
-// BIN_NV-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir
-// BIN_NV-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
-// BIN_AMD_RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
-// BIN_AMD_NRDC-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]])
-// BIN_AMD_NRDC-DAG: [[P7:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image
-// BIN_AMD_NRDC-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-hip)
-// BIN_AMD_NRDC-DAG: [[P11:[0-9]+]]: offload, "host-hip (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P8]]}, ir
-// BIN_AMD_NRDC-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
+// BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
+// BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
+// BIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object
+// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler
+// BIN-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]])
+// BIN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir
+// BIN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
-// BIN_AMD_RDC-DAG: [[P15:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
-// BIN_AMD_RDC-DAG: [[P16:[0-9]+]]: linker, {[[P15]]}, image, (device-[[T]], [[ARCH]])
-// BIN_AMD_RDC-DAG: [[P17:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P14]]},
-// BIN_AMD_RDC-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P16]]}, object
//
// Test single gpu architecture up to the assemble phase.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=ASM,ASM_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=ASM,ASM_AMD %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 -fcuda-rdc %s -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=ASM,ASM_AMD %s
-// ASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
-// ASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RUN: | FileCheck -check-prefixes=ASM %s
+// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// ASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
-// ASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
+// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// ASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler
// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
@@ -82,62 +52,46 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN2,BIN2_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN2,BIN2_AMD %s
-// BIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
-// BIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
+// RUN: | FileCheck -check-prefixes=BIN2 %s
+// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
-// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30|gfx803]])
+// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30]])
// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
-// BIN2_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
-// BIN2_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
-// BIN2_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object
-// BIN2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler
-// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
+// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object
+// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler
+// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
-// BIN2_NV-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
-// BIN2_NV-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
-// BIN2_NV-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object
-// BIN2_NV-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler
-// BIN2_NV-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]])
-// BIN2_NV-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir
-// BIN2_NV-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
-// BIN2_AMD-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object
+// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler
+// BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]])
+// BIN2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir
+// BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
// BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
-// BIN2_AMD-DAG: [[P22:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]])
-// BIN2_AMD-DAG: [[P23:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]])
-// BIN2_AMD-DAG: [[P24:[0-9]+]]: linker, {[[P22]]}, image, (device-[[T]], [[ARCH1]])
-// BIN2_AMD-DAG: [[P25:[0-9]+]]: linker, {[[P23]]}, image, (device-[[T]], [[ARCH2]])
-// BIN2_AMD-DAG: [[P26:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P21]]},
-// BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P24]]},
-// BIN2_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH2]])" {[[P25]]}, object
//
// Test two gpu architecturess up to the assemble phase.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=ASM2,ASM2_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=ASM2,ASM2_AMD %s
-// ASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])
-// ASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
+// RUN: | FileCheck -check-prefixes=ASM2 %s
+// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])
// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
-// ASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]])
-// ASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P3]]}, assembler
-// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
+// ASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]])
+// ASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P3]]}, assembler
+// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
// ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
// ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
-// ASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
-// ASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
+// ASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
+// ASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
// ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
// ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
// ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
@@ -149,12 +103,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=HBIN,HBIN_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=HBIN,HBIN_AMD %s
-// HBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
-// HBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
+// RUN: | FileCheck -check-prefixes=HBIN %s
+// HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
@@ -167,12 +117,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=HASM,HASM_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=HASM,HASM_AMD %s
-// HASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
-// HASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
+// RUN: | FileCheck -check-prefixes=HASM %s
+// HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
@@ -184,12 +130,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=HBIN2,HBIN2_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=HBIN2,HBIN2_AMD %s
-// HBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
-// HBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
+// RUN: | FileCheck -check-prefixes=HBIN2 %s
+// HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
@@ -203,12 +145,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \
-// RUN: 2>&1 | FileCheck -check-prefixes=HASM2,HASM2_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \
-// RUN: 2>&1 | FileCheck -check-prefixes=HASM2,HASM2_AMD %s
-// HASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
-// HASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
+// RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
+// HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
@@ -220,17 +158,13 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=DBIN,DBIN_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=DBIN,DBIN_AMD %s
-// DBIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
-// DBIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RUN: | FileCheck -check-prefixes=DBIN %s
+// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
-// DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
-// DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
+// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
// DBIN-NOT: host
//
// Test single gpu architecture up to the assemble phase in device-only
@@ -238,16 +172,12 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=DASM,DASM_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=DASM,DASM_AMD %s
-// DASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
-// DASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RUN: | FileCheck -check-prefixes=DASM %s
+// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
-// DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
+// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler
// DASM-NOT: host
//
@@ -256,23 +186,19 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
-// RUN: | FileCheck -check-prefixes=DBIN2,DBIN2_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
-// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
-// RUN: 2>&1 | FileCheck -check-prefixes=DBIN2,DBIN2_AMD %s
-// DBIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
-// DBIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RUN: | FileCheck -check-prefixes=DBIN2 %s
+// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DBIN2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
-// DBIN2_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
-// DBIN2_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object
-// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
+// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object
+// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
-// DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
-// DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
-// DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object
+// DBIN2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object
// DBIN2-NOT: host
//
// Test two gpu architectures up to the assemble phase in device-only
@@ -280,20 +206,15 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \
-// RUN: 2>&1 | FileCheck -check-prefixes=DASM2,DASM2_NV %s
-// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu \
-// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
-// RUN: --cuda-device-only -S 2>&1 \
-// RUN: | FileCheck -check-prefixes=DASM2,DASM2_AMD %s
-// DASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
-// DASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RUN: 2>&1 | FileCheck -check-prefixes=DASM2 %s
+// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
-// DASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
-// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
+// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler
+// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
// DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
-// DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
-// DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
+// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
+// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
// DASM2-NOT: host
diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip
index d173eda..53fe9af 100644
--- a/clang/test/Driver/hip-binding.hip
+++ b/clang/test/Driver/hip-binding.hip
@@ -2,6 +2,24 @@
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
+// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s
+// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], output: "[[IMG1:.*]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBIN:.*]]"
+// NRDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]", "[[FATBIN]]"], output: "{{.*}}"
+
+// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS %s
+// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC1:.*bc]]"
+// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC2:.*bc]]"
+// RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[HOSTOBJ:.*o]]"
+// RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[BC1]]", "[[BC2]]", "[[HOSTOBJ]]"], output: "{{.*}}"
+
// RUN: touch %t.o
// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
diff --git a/clang/test/Driver/hip-device-compile.hip b/clang/test/Driver/hip-device-compile.hip
index c442d23..fdae3a8 100644
--- a/clang/test/Driver/hip-device-compile.hip
+++ b/clang/test/Driver/hip-device-compile.hip
@@ -63,10 +63,10 @@
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=BUNDLE %s
-// BUNDLE: {{"*.clang.*"}}
-// BUNDLE: {{"*.llvm-link"}}
-// BUNDLE: {{".*opt"}}
-// BUNDLE: {{".*llc"}}
+// BUNDLE: {{"*.clang.*"}} {{.*}} "-emit-obj"
+// BUNDLE-NOT: {{"*.llvm-link"}}
+// BUNDLE-NOT: {{".*opt"}}
+// BUNDLE-NOT: {{".*llc"}}
// BUNDLE: {{".*lld.*"}}
// BUNDLE: {{".*clang-offload-bundler"}}
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
new file mode 100644
index 0000000..f14e8c1
--- /dev/null
+++ b/clang/test/Driver/hip-phases.hip
@@ -0,0 +1,214 @@
+// Tests the phases generated for a CUDA offloading target for different
+// combinations of:
+// - Number of gpu architectures;
+// - Host/device-only compilation;
+// - User-requested final phase - binary or assembly.
+
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+//
+// Test single gpu architecture with complete compilation.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=BIN,NRD %s
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=BIN,RDC %s
+//
+// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+
+// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
+// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
+// NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
+// NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
+// NRD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-hip, [[ARCH]])
+// NRD-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
+// NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-hip)
+// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]])
+
+// NRD-DAG: [[P12:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P10]]}, ir
+
+// NRD-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
+// RDC-DAG: [[P13:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// BIN-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (host-[[T]])
+// BIN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (host-[[T]])
+// RDC-DAG: [[P16:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P15]]}, "device-hip (amdgcn-amd-amdhsa:gfx803)" {[[P10]]}, image
+//
+// Test single gpu architecture up to the assemble phase.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=ASM %s
+// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+
+// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
+// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
+// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
+// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
+
+//
+// Test two gpu architectures with complete compilation.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=BIN2 %s
+
+// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+
+// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]])
+// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
+// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
+
+// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
+// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]])
+// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
+
+// BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-hip)
+
+// BIN2-DAG: [[P18:[0-9]+]]: offload, "host-hip (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P17]]}, ir
+// BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
+// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
+// BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
+
+//
+// Test two gpu architecturess up to the assemble phase.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=ASM2 %s
+// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
+// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
+// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
+// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
+// ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
+// ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
+// ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
+// ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
+// ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
+
+//
+// Test single gpu architecture with complete compilation in host-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
+// RUN: | FileCheck -check-prefixes=HBIN %s
+// HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
+// HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
+// HBIN-NOT: device
+//
+// Test single gpu architecture up to the assemble phase in host-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=HASM %s
+// HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HASM-NOT: device
+
+//
+// Test two gpu architectures with complete compilation in host-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
+// RUN: | FileCheck -check-prefixes=HBIN2 %s
+// HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
+// HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
+// HBIN2-NOT: device
+
+//
+// Test two gpu architectures up to the assemble phase in host-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \
+// RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
+// HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HASM2-NOT: device
+
+//
+// Test single gpu architecture with complete compilation in device-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
+// RUN: | FileCheck -check-prefixes=DBIN %s
+// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DBIN-NOT: host
+//
+// Test single gpu architecture up to the assemble phase in device-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=DASM %s
+// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DASM-NOT: host
+
+//
+// Test two gpu architectures with complete compilation in device-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
+// RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s
+// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
+// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
+// DBIN2-NOT: host
+//
+// Test two gpu architectures up to the assemble phase in device-only
+// compilation mode.
+//
+// RUN: %clang -x hip -target x86_64-unknown-linux-gnu \
+// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: --cuda-device-only -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=DASM2 %s
+// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
+// DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
+// DASM2-NOT: host
diff --git a/clang/test/Driver/hip-save-temps.hip b/clang/test/Driver/hip-save-temps.hip
index e9a8bd1..2da9386 100644
--- a/clang/test/Driver/hip-save-temps.hip
+++ b/clang/test/Driver/hip-save-temps.hip
@@ -23,15 +23,22 @@
// RUN: FileCheck -check-prefixes=CHECK,RDC,RDC-WOUT,WOUT %s
// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui"
-// CHECK: {{.*}}llvm-link{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900-linked.bc"
-// CHECK: {{.*}}opt{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900-optimized.bc"
-// CHECK: {{.*}}llc{{.*}}"-filetype=asm"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s"
-// CHECK: {{.*}}llc{{.*}}"-filetype=obj"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o"
+// NORDC: {{".*clang.*"}} {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
+// NORDC: {{".*clang.*"}} {{.*}} "-S" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s"
+// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o"
+// NORDC-NOT: {{.*}}llvm-link
+// NORDC-NOT: {{.*}}opt
+// NORDC-NOT: {{.*}}llc
+// RDC: {{.*}}llvm-link{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp-linked.bc"
+// RDC: {{.*}}opt{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp-optimized.bc"
+// RDC: {{.*}}llc{{.*}}"-filetype=asm"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.s"
+// RDC: {{.*}}llc{{.*}}"-filetype=obj"{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.o"
// NORDC: {{.*}}lld{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out"
// RDC: {{.*}}lld{{.*}}"-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900"
// NORDC: {{.*}}clang-offload-bundler{{.*}}"-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb"
// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui"
-// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
+// NORDC: {{.*}}clang{{.*}}"-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
+// RDC: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s"
// CHECK: {{.*}}clang{{.*}}"-o" "hip-save-temps{{.*}}.o"
// RDC-NOUT: {{.*}}clang-offload-bundler{{.*}}"-outputs=a.out.hipfb"
diff --git a/clang/test/Driver/hip-toolchain-mllvm.hip b/clang/test/Driver/hip-toolchain-mllvm.hip
index 692a9a7..66556d0 100644
--- a/clang/test/Driver/hip-toolchain-mllvm.hip
+++ b/clang/test/Driver/hip-toolchain-mllvm.hip
@@ -9,32 +9,18 @@
// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-target-cpu" "gfx803"
// CHECK-SAME: {{.*}} "-mllvm" "-amdgpu-function-calls=0" {{.*}}
-// CHECK: [[OPT:".*opt"]] {{".*-gfx803-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx803" "-amdgpu-function-calls=0"
-// CHECK-SAME: "-o" [[OPT_803_BC:".*-gfx803-optimized.*bc"]]
-
-// CHECK: [[LLC: ".*llc"]] [[OPT_803_BC]]
-// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: {{.*}} "-mcpu=gfx803"
-// CHECK-SAME: "-filetype=obj"
-// CHECK-SAME: "-amdgpu-function-calls=0" "-o" {{".*-gfx803-.*o"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-target-cpu" "gfx900"
// CHECK-SAME: {{.*}} "-mllvm" "-amdgpu-function-calls=0" {{.*}}
-// CHECK: [[OPT]] {{".*-gfx900-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx900" "-amdgpu-function-calls=0"
-// CHECK-SAME: "-o" [[OPT_900_BC:".*-gfx900-optimized.*bc"]]
-
-// CHECK: [[LLC]] [[OPT_900_BC]]
-// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: {{.*}} "-mcpu=gfx900"
-// CHECk-SAME: "-filetype=obj"
-// CHECK-SAME: "-amdgpu-function-calls=0" "-o" {{".*-gfx900-.*o"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip
index fd5cf1b..6e212b6 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -18,25 +18,17 @@
// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
-// CHECK-SAME: {{.*}} "-o" [[A_BC_803:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
-// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC_803]]
-// CHECK-SAME: "-o" [[LINKED_BC_DEV_A_803:".*-gfx803-linked-.*bc"]]
-
-// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx803"
-// CHECK-SAME: "-o" [[OPT_BC_DEV_A_803:".*-gfx803-optimized.*bc"]]
-
-// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx803"
-// CHECK-SAME: "-filetype=obj"
-// CHECK-SAME: "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]]
+// CHECK-NOT: {{".*llvm-link"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared"
// CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]
@@ -47,25 +39,17 @@
// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
-// CHECK-SAME: {{.*}} "-o" [[A_BC_900:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[A_SRC]]
-// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC_900]]
-// CHECK-SAME: "-o" [[LINKED_BC_DEV_A_900:".*-gfx900-linked-.*bc"]]
-
-// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx900"
-// CHECK-SAME: "-o" [[OPT_BC_DEV_A_900:".*-gfx900-optimized.*bc"]]
-
-// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx900"
-// CHECK-SAME: "-filetype=obj"
-// CHECK-SAME: "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]]
+// CHECK-NOT: {{".*llvm-link"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
// CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
@@ -92,25 +76,17 @@
// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
-// CHECK-SAME: {{.*}} "-o" [[B_BC_803:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_B_803:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
-// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[B_BC_803]]
-// CHECK-SAME: "-o" [[LINKED_BC_DEV_B_803:".*-gfx803-linked-.*bc"]]
-
-// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx803"
-// CHECK-SAME: "-o" [[OPT_BC_DEV_B_803:".*-gfx803-optimized.*bc"]]
-
-// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx803"
-// CHECK-SAME: "-filetype=obj"
-// CHECK-SAME: "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]]
+// CHECK-NOT: {{".*llvm-link"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
// CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]]
@@ -121,25 +97,17 @@
// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
// CHECK-SAME: "-fapply-global-visibility-to-externs"
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
-// CHECK-SAME: {{.*}} "-o" [[B_BC_900:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_B_900:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[B_SRC]]
-// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[B_BC_900]]
-// CHECK-SAME: "-o" [[LINKED_BC_DEV_B_900:".*-gfx900-linked-.*bc"]]
-
-// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-mcpu=gfx900"
-// CHECK-SAME: "-o" [[OPT_BC_DEV_B_900:".*-gfx900-optimized.*bc"]]
-
-// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECk-SAME: "-mcpu=gfx900"
-// CHECK-SAME: "-filetype=obj"
-// CHECK-SAME: "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]]
+// CHECK-NOT: {{".*llvm-link"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
// CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]]
diff --git a/clang/test/Driver/hip-toolchain-opt.hip b/clang/test/Driver/hip-toolchain-opt.hip
index c6d6bf5..4a470db 100644
--- a/clang/test/Driver/hip-toolchain-opt.hip
+++ b/clang/test/Driver/hip-toolchain-opt.hip
@@ -68,27 +68,9 @@
// Oz-SAME: "-Oz"
// Og-SAME: "-Og"
-// ALL: "{{.*}}opt"
-// DEFAULT-NOT: "-O{{.}}"
-// O0-SAME: "-O0"
-// O1-SAME: "-O1"
-// O2-SAME: "-O2"
-// O3-SAME: "-O3"
-// Os-SAME: "-Os"
-// Oz-SAME: "-Oz"
-// Og-SAME: "-O1"
-// ALL-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// ALL-NOT: "{{.*}}opt"
-// ALL: "{{.*}}llc"
-// DEFAULT-NOT: "-O{{.}}"
-// O0-SAME: "-O0"
-// O1-SAME: "-O1"
-// O2-SAME: "-O2"
-// O3-SAME: "-O3"
-// Os-SAME: "-O2"
-// Oz-SAME: "-O2"
-// Og-SAME: "-O1"
-// ALL-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// ALL-NOT: "{{.*}}llc"
// ALL: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// DEFAULT-NOT: "-O{{.}}"