diff options
author | Joseph Huber <huberjn@outlook.com> | 2023-12-19 20:01:15 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-19 20:01:15 -0600 |
commit | deab58d1277515d9e2e67d0d8dfd2f2a9f7c6e17 (patch) | |
tree | a06470004a6cc8053ffe43575749c42e207f1c38 /llvm/lib/Object/ELFObjectFile.cpp | |
parent | 56414220dfeb274a15beb55ab3da757978a0255f (diff) | |
download | llvm-deab58d1277515d9e2e67d0d8dfd2f2a9f7c6e17.zip llvm-deab58d1277515d9e2e67d0d8dfd2f2a9f7c6e17.tar.gz llvm-deab58d1277515d9e2e67d0d8dfd2f2a9f7c6e17.tar.bz2 |
[ELF] Add CPU name detection for CUDA architectures (#75964)
Summary:
Recently we added support for detecting the CUDA processor with the ELF
flags. This allows us to get a string representation of it in other
code. This will be used by the offloading runtime.
Diffstat (limited to 'llvm/lib/Object/ELFObjectFile.cpp')
-rw-r--r-- | llvm/lib/Object/ELFObjectFile.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 3c86b0f..95c4f9f 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -358,6 +358,8 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const { switch (getEMachine()) { case ELF::EM_AMDGPU: return getAMDGPUCPUName(); + case ELF::EM_CUDA: + return getNVPTXCPUName(); case ELF::EM_PPC: case ELF::EM_PPC64: return StringRef("future"); @@ -517,6 +519,73 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { } } +StringRef ELFObjectFileBase::getNVPTXCPUName() const { + assert(getEMachine() == ELF::EM_CUDA); + unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM; + + switch (SM) { + // Fermi architecture. + case ELF::EF_CUDA_SM20: + return "sm_20"; + case ELF::EF_CUDA_SM21: + return "sm_21"; + + // Kepler architecture. + case ELF::EF_CUDA_SM30: + return "sm_30"; + case ELF::EF_CUDA_SM32: + return "sm_32"; + case ELF::EF_CUDA_SM35: + return "sm_35"; + case ELF::EF_CUDA_SM37: + return "sm_37"; + + // Maxwell architecture. + case ELF::EF_CUDA_SM50: + return "sm_50"; + case ELF::EF_CUDA_SM52: + return "sm_52"; + case ELF::EF_CUDA_SM53: + return "sm_53"; + + // Pascal architecture. + case ELF::EF_CUDA_SM60: + return "sm_60"; + case ELF::EF_CUDA_SM61: + return "sm_61"; + case ELF::EF_CUDA_SM62: + return "sm_62"; + + // Volta architecture. + case ELF::EF_CUDA_SM70: + return "sm_70"; + case ELF::EF_CUDA_SM72: + return "sm_72"; + + // Turing architecture. + case ELF::EF_CUDA_SM75: + return "sm_75"; + + // Ampere architecture. + case ELF::EF_CUDA_SM80: + return "sm_80"; + case ELF::EF_CUDA_SM86: + return "sm_86"; + case ELF::EF_CUDA_SM87: + return "sm_87"; + + // Ada architecture. + case ELF::EF_CUDA_SM89: + return "sm_89"; + + // Hopper architecture. + case ELF::EF_CUDA_SM90: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90"; + default: + llvm_unreachable("Unknown EF_CUDA_SM value"); + } +} + // FIXME Encode from a tablegen description or target parser. void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { if (TheTriple.getSubArch() != Triple::NoSubArch) |