aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CodeGenModule.cpp
diff options
context:
space:
mode:
authorYaxun (Sam) Liu <yaxun.liu@amd.com>2024-02-22 13:51:31 -0500
committerGitHub <noreply@github.com>2024-02-22 13:51:31 -0500
commit33a6ce18373ffd1457ebd54e930b6f02fe4c39c1 (patch)
tree71726a5abccad007b064df6fa74429de0762744f /clang/lib/CodeGen/CodeGenModule.cpp
parentcc839275164a7768451531af868fa70eb9e71cbd (diff)
downloadllvm-33a6ce18373ffd1457ebd54e930b6f02fe4c39c1.zip
llvm-33a6ce18373ffd1457ebd54e930b6f02fe4c39c1.tar.gz
llvm-33a6ce18373ffd1457ebd54e930b6f02fe4c39c1.tar.bz2
[HIP] Allow partial linking for `-fgpu-rdc` (#81700)
`-fgpu-rdc` mode allows device functions call device functions in different TU. However, currently all device objects have to be linked together since only one fat binary is supported. This is time consuming for AMDGPU backend since it only supports LTO. There are use cases that objects can be divided into groups in which device functions are self-contained but host functions are not. It is desirable to link/optimize/codegen the device code and generate a fatbin for each group, whereas partially link the host code with `ld -r` or generate a static library by using the `--emit-static-lib` option of clang. This avoids linking all device code together, therefore decreases the linking time for `-fgpu-rdc`. Previously, clang emits an external symbol `__hip_fatbin` for all objects for `-fgpu-rdc`. With this patch, clang emits an unique external symbol `__hip_fatbin_{cuid}` for the fat binary for each object. When a group of objects are linked together to generate a fatbin, the symbols are merged by alias and point to the same fat binary. Each group has its own fat binary. One executable or shared library can have multiple fat binaries. Device linking is done for undefined fab binary symbols only to avoid repeated linking. `__hip_gpubin_handle` is also uniquefied and merged to avoid repeated registering. Symbol `__hip_cuid_{cuid}` is introduced to facilitate debugging and tooling. Fixes: https://github.com/llvm/llvm-project/issues/77018
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp10
1 files changed, 9 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 77fb3a6..95e457b 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -915,7 +915,15 @@ void CodeGenModule::Release() {
llvm::ConstantArray::get(ATy, UsedArray), "__clang_gpu_used_external");
addCompilerUsedGlobal(GV);
}
-
+ if (LangOpts.HIP) {
+ // Emit a unique ID so that host and device binaries from the same
+ // compilation unit can be associated.
+ auto *GV = new llvm::GlobalVariable(
+ getModule(), Int8Ty, false, llvm::GlobalValue::ExternalLinkage,
+ llvm::Constant::getNullValue(Int8Ty),
+ "__hip_cuid_" + getContext().getCUIDHash());
+ addCompilerUsedGlobal(GV);
+ }
emitLLVMUsed();
if (SanStats)
SanStats->finish();