aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2023-03-27 10:19:16 -0500
committerJoseph Huber <jhuber6@vols.utk.edu>2023-03-27 18:08:15 -0500
commitbed7005eb4d4850b6f9d93707213ced5c0c19de0 (patch)
tree7945d9f7202b5116483392e94b699208a442c5dc
parent4a38d33268959309dd97d9ef423327607bda4104 (diff)
downloadllvm-bed7005eb4d4850b6f9d93707213ced5c0c19de0.zip
llvm-bed7005eb4d4850b6f9d93707213ced5c0c19de0.tar.gz
llvm-bed7005eb4d4850b6f9d93707213ced5c0c19de0.tar.bz2
[NVPTX] Add __CUDA_ARCH__ macro to standalone NVPTX compilations
We can now target the NVPTX architecture directly via `--target=nvptx64-nvidia-cuda`. This currently does not define the `__CUDA_ARCH__` macro with is used to allow code to target different codes based on support. This patch simply adds this support. Reviewed By: tra, jdoerfert Differential Revision: https://reviews.llvm.org/D146975
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp2
-rw-r--r--clang/test/Frontend/standalone-nvptx-macros.c5
2 files changed, 6 insertions, 1 deletions
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index aca51b2..7f4c5d8 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -168,7 +168,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");
- if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+ if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || !HostTarget) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
switch (GPU) {
diff --git a/clang/test/Frontend/standalone-nvptx-macros.c b/clang/test/Frontend/standalone-nvptx-macros.c
new file mode 100644
index 0000000..5cf20ec
--- /dev/null
+++ b/clang/test/Frontend/standalone-nvptx-macros.c
@@ -0,0 +1,5 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang %s -c -E -dM --target=nvptx64-nvidia-cuda -march=sm_70 -o - | \
+// RUN: FileCheck --check-prefix=CHECK-CUDA-ARCH %s
+// CHECK-CUDA-ARCH: #define __CUDA_ARCH__ 700