1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple x86_64-linux-gnu \
// RUN: | FileCheck -check-prefixes=CHECK,HOST %s
// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: | FileCheck -check-prefixes=CHECK,DEV %s
#include "Inputs/cuda.h"
// CHECK: %class.anon = type { ptr, float, ptr, ptr }
// CHECK: %class.anon.0 = type { ptr, float, ptr, ptr }
// CHECK: %class.anon.1 = type { ptr, ptr, ptr }
// CHECK: %class.anon.2 = type { ptr, float, ptr, ptr }
// HOST: call void @_ZN8DevByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon)
// DEV: define amdgpu_kernel void @_ZN8DevByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon)
// Only the device function passes arugments by value.
namespace DevByVal {
__device__ float fun(float x, float y) {
return x;
}
float fun(const float &x, const float &y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN9HostByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.0)
// DEV: define amdgpu_kernel void @_ZN9HostByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.0)
// Only the host function passes arugments by value.
namespace HostByVal {
float fun(float x, float y) {
return x;
}
__device__ float fun(const float &x, const float &y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN9BothByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.1)
// DEV: define amdgpu_kernel void @_ZN9BothByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.1)
// Both the host and device functions pass arugments by value.
namespace BothByVal {
float fun(float x, float y) {
return x;
}
__device__ float fun(float x, float y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
// HOST: call void @_ZN12NeitherByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.2)
// DEV: define amdgpu_kernel void @_ZN12NeitherByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.2)
// Neither the host nor device function passes arugments by value.
namespace NeitherByVal {
float fun(const float& x, const float& y) {
return x;
}
__device__ float fun(const float& x, const float& y) {
return x;
}
template<typename F>
void __global__ kernel(F f)
{
f(1);
}
void test(float const * fl, float const * A, float * Vf)
{
float constexpr small(1.0e-25);
auto lambda = [=] __device__ __host__ (unsigned int n) {
float const value = fun(small, fl[0]);
Vf[0] = value * A[0];
};
kernel<<<1, 1>>>(lambda);
}
}
|