aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMészáros Gergely <gergely@streamhpc.com>2024-02-05 18:53:13 +0100
committerGitHub <noreply@github.com>2024-02-05 23:23:13 +0530
commit5942868a215ce4dbd927a7f0b06432e1eeaed698 (patch)
treec610f18f042bf78f5dfa0840e521af741efc7246
parentee06678a7500d5d8f6aa8d2442389cdb90417c38 (diff)
downloadllvm-5942868a215ce4dbd927a7f0b06432e1eeaed698.zip
llvm-5942868a215ce4dbd927a7f0b06432e1eeaed698.tar.gz
llvm-5942868a215ce4dbd927a7f0b06432e1eeaed698.tar.bz2
[clang][AMDGPU][CUDA] Handle __builtin_printf for device printf (#68515)
Previously `__builtin_printf` would result to emitting call to `printf`, even though directly calling `printf` was translated. Ref: #68478
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp1
-rw-r--r--clang/lib/CodeGen/CGGPUBuiltin.cpp3
-rw-r--r--clang/test/CodeGenCUDA/printf-builtin.cu21
-rw-r--r--clang/test/CodeGenHIP/printf-builtin.hip23
4 files changed, 47 insertions, 1 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f17e4a8..e051cbc 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5710,6 +5710,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *HalfVal = Builder.CreateLoad(Address);
return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
}
+ case Builtin::BI__builtin_printf:
case Builtin::BIprintf:
if (getTarget().getTriple().isNVPTX() ||
getTarget().getTriple().isAMDGCN()) {
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index e465789..bd95541 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -136,7 +136,8 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
llvm::Function *Decl, bool WithSizeArg) {
CodeGenModule &CGM = CGF->CGM;
CGBuilderTy &Builder = CGF->Builder;
- assert(E->getBuiltinCallee() == Builtin::BIprintf);
+ assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+ E->getBuiltinCallee() == Builtin::BI__builtin_printf);
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
// Uses the same format as nvptx for the argument packing, but also passes
diff --git a/clang/test/CodeGenCUDA/printf-builtin.cu b/clang/test/CodeGenCUDA/printf-builtin.cu
new file mode 100644
index 0000000..e018d53
--- /dev/null
+++ b/clang/test/CodeGenCUDA/printf-builtin.cu
@@ -0,0 +1,21 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -disable-llvm-optzns -fno-builtin-printf -fcuda-is-device \
+// RUN: -o - %s | FileCheck %s
+
+#define __device__ __attribute__((device))
+
+extern "C" __device__ int printf(const char *format, ...);
+
+// CHECK-LABEL: @_Z4foo1v()
+__device__ int foo1() {
+ // CHECK: call i32 @vprintf
+ // CHECK-NOT: call i32 (ptr, ...) @printf
+ return __builtin_printf("Hello World\n");
+}
+
+// CHECK-LABEL: @_Z4foo2v()
+__device__ int foo2() {
+ // CHECK: call i32 (ptr, ...) @printf
+ return printf("Hello World\n");
+}
diff --git a/clang/test/CodeGenHIP/printf-builtin.hip b/clang/test/CodeGenHIP/printf-builtin.hip
new file mode 100644
index 0000000..df1fbbb6
--- /dev/null
+++ b/clang/test/CodeGenHIP/printf-builtin.hip
@@ -0,0 +1,23 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns -mprintf-kind=hostcall -fno-builtin-printf -fcuda-is-device \
+// RUN: -o - %s | FileCheck --check-prefixes=CHECK,HOSTCALL %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns -mprintf-kind=buffered -fno-builtin-printf -fcuda-is-device \
+// RUN: -o - %s | FileCheck --check-prefixes=CHECK,BUFFERED %s
+
+#define __device__ __attribute__((device))
+
+extern "C" __device__ int printf(const char *format, ...);
+
+// CHECK-LABEL: @_Z4foo1v()
+__device__ int foo1() {
+ // HOSTCALL: call i64 @__ockl_printf_begin
+ // BUFFERED: call ptr addrspace(1) @__printf_alloc
+ // CHECK-NOT: call i32 (ptr, ...) @printf
+ return __builtin_printf("Hello World\n");
+}
+
+// CHECK-LABEL: @_Z4foo2v()
+__device__ int foo2() {
+ // CHECK: call i32 (ptr, ...) @printf
+ return printf("Hello World\n");
+}