clang/test/SemaCUDA/function-overload.cu


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716

// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only \
// RUN:   -verify=host,hostdefer,devdefer,expected %s
// RUN: %clang_cc1 -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only \
// RUN:   -fcuda-is-device -verify=dev,devnodeferonly,hostdefer,devdefer,expected %s
// RUN: %clang_cc1 -fgpu-exclude-wrong-side-overloads -fgpu-defer-diag -DDEFER=1 \
// RUN:    -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only \
// RUN:    -verify=host,hostdefer,expected %s
// RUN: %clang_cc1 -fgpu-exclude-wrong-side-overloads -fgpu-defer-diag -DDEFER=1 \
// RUN:    -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device \
// RUN:    -verify=dev,devdeferonly,devdefer,expected %s

#include "Inputs/cuda.h"

// Opaque return types used to check that we pick the right overloads.
struct HostReturnTy {};
struct HostReturnTy2 {};
struct DeviceReturnTy {};
struct DeviceReturnTy2 {};
struct HostDeviceReturnTy {};
struct TemplateReturnTy {};

typedef HostReturnTy (*HostFnPtr)();
typedef DeviceReturnTy (*DeviceFnPtr)();
typedef HostDeviceReturnTy (*HostDeviceFnPtr)();
typedef void (*GlobalFnPtr)();  // __global__ functions must return void.

// CurrentReturnTy is {HostReturnTy,DeviceReturnTy} during {host,device}
// compilation.
#ifdef __CUDA_ARCH__
typedef DeviceReturnTy CurrentReturnTy;
#else
typedef HostReturnTy CurrentReturnTy;
#endif

// CurrentFnPtr is a function pointer to a {host,device} function during
// {host,device} compilation.
typedef CurrentReturnTy (*CurrentFnPtr)();

// Host and unattributed functions can't be overloaded.
__host__ void hh() {} // expected-note {{previous definition is here}}
void hh() {} // expected-error {{redefinition of 'hh'}}

// H/D overloading is OK.
__host__ HostReturnTy dh() { return HostReturnTy(); }
__device__ DeviceReturnTy dh() { return DeviceReturnTy(); }

// H/HD and D/HD are not allowed.
__host__ __device__ int hdh() { return 0; } // expected-note {{previous declaration is here}}
__host__ int hdh() { return 0; }
// expected-error@-1 {{__host__ function 'hdh' cannot overload __host__ __device__ function 'hdh'}}

__host__ int hhd() { return 0; }            // expected-note {{previous declaration is here}}
__host__ __device__ int hhd() { return 0; }
// expected-error@-1 {{__host__ __device__ function 'hhd' cannot overload __host__ function 'hhd'}}

__host__ __device__ int hdd() { return 0; } // expected-note {{previous declaration is here}}
__device__ int hdd() { return 0; }
// expected-error@-1 {{__device__ function 'hdd' cannot overload __host__ __device__ function 'hdd'}}

__device__ int dhd() { return 0; }          // expected-note {{previous declaration is here}}
__host__ __device__ int dhd() { return 0; }
// expected-error@-1 {{__host__ __device__ function 'dhd' cannot overload __device__ function 'dhd'}}

// Same tests for extern "C" functions.
extern "C" __host__ int chh() { return 0; } // expected-note {{previous definition is here}}
extern "C" int chh() { return 0; }          // expected-error {{redefinition of 'chh'}}

// H/D overloading is OK.
extern "C" __device__ DeviceReturnTy cdh() { return DeviceReturnTy(); }
extern "C" __host__ HostReturnTy cdh() { return HostReturnTy(); }

// H/HD and D/HD overloading is not allowed.
extern "C" __host__ __device__ int chhd1() { return 0; } // expected-note {{previous declaration is here}}
extern "C" __host__ int chhd1() { return 0; }
// expected-error@-1 {{__host__ function 'chhd1' cannot overload __host__ __device__ function 'chhd1'}}

extern "C" __host__ int chhd2() { return 0; } // expected-note {{previous declaration is here}}
extern "C" __host__ __device__ int chhd2() { return 0; }
// expected-error@-1 {{__host__ __device__ function 'chhd2' cannot overload __host__ function 'chhd2'}}

// Helper functions to verify calling restrictions.
__device__ DeviceReturnTy d() { return DeviceReturnTy(); }
// host-note@-1 1+ {{'d' declared here}}
// hostdefer-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
// expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}

__host__ HostReturnTy h() { return HostReturnTy(); }
// dev-note@-1 1+ {{'h' declared here}}
// devdefer-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
// expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
// devdefer-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}

__global__ void g() {}
// dev-note@-1 1+ {{'g' declared here}}
// devdefer-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}}
// expected-note@-3 0+ {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
// devdefer-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}}

extern "C" __device__ DeviceReturnTy cd() { return DeviceReturnTy(); }
// host-note@-1 1+ {{'cd' declared here}}
// hostdefer-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
// expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}

extern "C" __host__ HostReturnTy ch() { return HostReturnTy(); }
// dev-note@-1 1+ {{'ch' declared here}}
// devdefer-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
// expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
// devdefer-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}

__host__ void hostf() {
  DeviceFnPtr fp_d = d;         // host-error {{reference to __device__ function 'd' in __host__ function}}
  DeviceReturnTy ret_d = d();   // hostdefer-error {{no matching function for call to 'd'}}
  DeviceFnPtr fp_cd = cd;       // host-error {{reference to __device__ function 'cd' in __host__ function}}
  DeviceReturnTy ret_cd = cd(); // hostdefer-error {{no matching function for call to 'cd'}}

  HostFnPtr fp_h = h;
  HostReturnTy ret_h = h();
  HostFnPtr fp_ch = ch;
  HostReturnTy ret_ch = ch();

  HostFnPtr fp_dh = dh;
  HostReturnTy ret_dh = dh();
  HostFnPtr fp_cdh = cdh;
  HostReturnTy ret_cdh = cdh();

  GlobalFnPtr fp_g = g;
  g(); // expected-error {{call to global function 'g' not configured}}
  g<<<0, 0>>>();
}

__device__ void devicef() {
  DeviceFnPtr fp_d = d;
  DeviceReturnTy ret_d = d();
  DeviceFnPtr fp_cd = cd;
  DeviceReturnTy ret_cd = cd();

  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __device__ function}}
  HostReturnTy ret_h = h();   // devdefer-error {{no matching function for call to 'h'}}
  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __device__ function}}
  HostReturnTy ret_ch = ch(); // devdefer-error {{no matching function for call to 'ch'}}

  DeviceFnPtr fp_dh = dh;
  DeviceReturnTy ret_dh = dh();
  DeviceFnPtr fp_cdh = cdh;
  DeviceReturnTy ret_cdh = cdh();

  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __device__ function}}
  g(); // devdefer-error {{no matching function for call to 'g'}}
  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __device__ function}}
}

__global__ void globalf() {
  DeviceFnPtr fp_d = d;
  DeviceReturnTy ret_d = d();
  DeviceFnPtr fp_cd = cd;
  DeviceReturnTy ret_cd = cd();

  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __global__ function}}
  HostReturnTy ret_h = h();   // devdefer-error {{no matching function for call to 'h'}}
  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __global__ function}}
  HostReturnTy ret_ch = ch(); // devdefer-error {{no matching function for call to 'ch'}}

  DeviceFnPtr fp_dh = dh;
  DeviceReturnTy ret_dh = dh();
  DeviceFnPtr fp_cdh = cdh;
  DeviceReturnTy ret_cdh = cdh();

  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __global__ function}}
  g(); // devdefer-error {{no matching function for call to 'g'}}
  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __global__ function}}
}

__host__ __device__ void hostdevicef() {
  DeviceFnPtr fp_d = d;
  DeviceReturnTy ret_d = d();
  DeviceFnPtr fp_cd = cd;
  DeviceReturnTy ret_cd = cd();
#if !defined(__CUDA_ARCH__)
  // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
  // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
  // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
  // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
#endif

  HostFnPtr fp_h = h;
  HostReturnTy ret_h = h();
  HostFnPtr fp_ch = ch;
  HostReturnTy ret_ch = ch();
#if defined(__CUDA_ARCH__)
  // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
  // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
  // devdefer-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
  // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
#endif

  CurrentFnPtr fp_dh = dh;
  CurrentReturnTy ret_dh = dh();
  CurrentFnPtr fp_cdh = cdh;
  CurrentReturnTy ret_cdh = cdh();

  GlobalFnPtr fp_g = g;
#if defined(__CUDA_ARCH__)
  // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
#endif

  g();
#if defined (__CUDA_ARCH__)
  // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
#else
  // expected-error@-4 {{call to global function 'g' not configured}}
#endif

  g<<<0,0>>>();
#if defined(__CUDA_ARCH__)
  // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
#endif
}

// Test for address of overloaded function resolution in the global context.
HostFnPtr fp_h = h;
HostFnPtr fp_ch = ch;
#if defined (__CUDA_ARCH__)
__device__
#endif
CurrentFnPtr fp_dh = dh;
#if defined (__CUDA_ARCH__)
__device__
#endif
CurrentFnPtr fp_cdh = cdh;
GlobalFnPtr fp_g = g;


// Test overloading of destructors
// Can't mix H and unattributed destructors
struct d_h {
  ~d_h() {} // expected-note {{previous definition is here}}
  __host__ ~d_h() {} // expected-error {{destructor cannot be redeclared}}
};

// HD is OK
struct d_hd {
  __host__ __device__ ~d_hd() {}
};

// Test overloading of member functions
struct m_h {
  void operator delete(void *ptr); // expected-note {{previous declaration is here}}
  __host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}}
};

// D/H overloading is OK
struct m_dh {
  __device__ void operator delete(void *ptr);
  __host__ void operator delete(void *ptr);
};

// HD by itself is OK
struct m_hd {
  __device__ __host__ void operator delete(void *ptr);
};

struct m_hhd {
  __host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
  __host__ __device__ void operator delete(void *ptr) {}
  // expected-error@-1 {{__host__ __device__ function 'operator delete' cannot overload __host__ function 'operator delete'}}
};

struct m_hdh {
  __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
  __host__ void operator delete(void *ptr) {}
  // expected-error@-1 {{__host__ function 'operator delete' cannot overload __host__ __device__ function 'operator delete'}}
};

struct m_dhd {
  __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
  __host__ __device__ void operator delete(void *ptr) {}
  // expected-error@-1 {{__host__ __device__ function 'operator delete' cannot overload __device__ function 'operator delete'}}
};

struct m_hdd {
  __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
  __device__ void operator delete(void *ptr) {}
  // expected-error@-1 {{__device__ function 'operator delete' cannot overload __host__ __device__ function 'operator delete'}}
};

// __global__ functions can't be overloaded based on attribute
// difference.
struct G {
  friend void friend_of_g(G &arg); // expected-note {{previous declaration is here}}
private:
  int x; // expected-note {{declared private here}}
};
__global__ void friend_of_g(G &arg) { int x = arg.x; }
// expected-error@-1 {{__global__ function 'friend_of_g' cannot overload __host__ function 'friend_of_g'}}
// expected-error@-2 {{'x' is a private member of 'G'}}
void friend_of_g(G &arg) { int x = arg.x; }

// HD functions are sometimes allowed to call H or D functions -- this
// is an artifact of the source-to-source splitting performed by nvcc
// that we need to mimic. During device mode compilation in nvcc, host
// functions aren't present at all, so don't participate in
// overloading. But in clang, H and D functions are present in both
// compilation modes. Clang normally uses the target attribute as a
// tiebreaker between overloads with otherwise identical priority, but
// in order to match nvcc's behavior, we sometimes need to wholly
// discard overloads that would not be present during compilation
// under nvcc.

template <typename T> TemplateReturnTy template_vs_function(T arg) {
  return TemplateReturnTy();
}
__device__ DeviceReturnTy template_vs_function(float arg) {
  return DeviceReturnTy();
}

// Here we expect to call the templated function during host compilation, even
// if -fcuda-disable-target-call-checks is passed, and even though C++ overload
// rules prefer the non-templated function.
__host__ __device__ void test_host_device_calls_template(void) {
#ifdef __CUDA_ARCH__
  typedef DeviceReturnTy ExpectedReturnTy;
#else
  typedef TemplateReturnTy ExpectedReturnTy;
#endif

  ExpectedReturnTy ret1 = template_vs_function(1.0f);
  ExpectedReturnTy ret2 = template_vs_function(2.0);
}

// Calls from __host__ and __device__ functions should always call the
// overloaded function that matches their mode.
__host__ void test_host_calls_template_fn() {
  TemplateReturnTy ret1 = template_vs_function(1.0f);
  TemplateReturnTy ret2 = template_vs_function(2.0);
}

__device__ void test_device_calls_template_fn() {
  DeviceReturnTy ret1 = template_vs_function(1.0f);
  DeviceReturnTy ret2 = template_vs_function(2.0);
}

// If we have a mix of HD and H-only or D-only candidates in the overload set,
// normal C++ overload resolution rules apply first.
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
// devnodeferonly-note@-1{{'template_vs_hd_function<int>' declared here}}
{
  return TemplateReturnTy();
}
__host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
  return HostDeviceReturnTy();
}

__host__ __device__ void test_host_device_calls_hd_template() {
#if __CUDA_ARCH__ && DEFER
  typedef HostDeviceReturnTy ExpectedReturnTy;
#else
  typedef TemplateReturnTy ExpectedReturnTy;
#endif
  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
  ExpectedReturnTy ret2 = template_vs_hd_function(1);
  // devnodeferonly-error@-1{{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
}

__host__ void test_host_calls_hd_template() {
  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
  TemplateReturnTy ret2 = template_vs_hd_function(1);
}

__device__ void test_device_calls_hd_template() {
  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
  // Host-only function template is not callable with strict call checks,
  // so for device side HD function will be the only choice.
  HostDeviceReturnTy ret2 = template_vs_hd_function(1);
}

// Check that overloads still work the same way on both host and
// device side when the overload set contains only functions from one
// side of compilation.
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
#ifndef __CUDA_ARCH__
  // expected-note@-3 2{{'device_only_function' declared here}}
  // expected-note@-3 2{{'device_only_function' declared here}}
#endif
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
#ifdef __CUDA_ARCH__
  // expected-note@-3 2{{'host_only_function' declared here}}
  // expected-note@-3 2{{'host_only_function' declared here}}
#endif

__host__ __device__ void test_host_device_single_side_overloading() {
  DeviceReturnTy ret1 = device_only_function(1);
  DeviceReturnTy2 ret2 = device_only_function(1.0f);
#ifndef __CUDA_ARCH__
  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
#endif
  HostReturnTy ret3 = host_only_function(1);
  HostReturnTy2 ret4 = host_only_function(1.0f);
#ifdef __CUDA_ARCH__
  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
#endif
}

// wrong-sided overloading should not cause diagnostic unless it is emitted.
// This inline function is not emitted.
inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() {
  DeviceReturnTy ret1 = device_only_function(1);
  DeviceReturnTy2 ret2 = device_only_function(1.0f);
  HostReturnTy ret3 = host_only_function(1);
  HostReturnTy2 ret4 = host_only_function(1.0f);
}

// wrong-sided overloading should cause diagnostic if it is emitted.
// This inline function is emitted since it is called by an emitted function.
inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() {
  DeviceReturnTy ret1 = device_only_function(1);
  DeviceReturnTy2 ret2 = device_only_function(1.0f);
#ifndef __CUDA_ARCH__
  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
#endif
  HostReturnTy ret3 = host_only_function(1);
  HostReturnTy2 ret4 = host_only_function(1.0f);
#ifdef __CUDA_ARCH__
  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
#endif
}

__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() {
  test_host_device_wrong_side_overloading_inline_diag();
  // expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}}
}

// Verify that we allow overloading function templates.
template <typename T> __host__ T template_overload(const T &a) { return a; };
template <typename T> __device__ T template_overload(const T &a) { return a; };

__host__ void test_host_template_overload() {
  template_overload(1); // OK. Attribute-based overloading picks __host__ variant.
}
__device__ void test_device_template_overload() {
  template_overload(1); // OK. Attribute-based overloading picks __device__ variant.
}

// Two classes with `operator-` defined. One of them is device only.
struct C1;
struct C2;
__device__
int operator-(const C1 &x, const C1 &y);
int operator-(const C2 &x, const C2 &y);

template <typename T>
__host__ __device__ int constexpr_overload(const T &x, const T &y) {
  return x - y;
}

// Verify that function overloading doesn't prune candidate wrongly.
int test_constexpr_overload(C2 &x, C2 &y) {
  return constexpr_overload(x, y);
}

// Verify no ambiguity for new operator.
void *a = new int;
__device__ void *b = new int;
// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}

// Verify no ambiguity for new operator.
template<typename _Tp> _Tp&& f();
template<typename _Tp, typename = decltype(new _Tp(f<_Tp>()))>
void __test();

void foo() {
  __test<int>();
}

// Test resolving implicit host device candidate vs wrong-sided candidate.
// In device compilation, implicit host device caller choose implicit host
// device candidate and wrong-sided candidate with equal preference.
// Resolution result should not change with/without pragma.
namespace ImplicitHostDeviceVsWrongSided {
HostReturnTy callee(double x);
#pragma clang force_cuda_host_device begin
HostDeviceReturnTy callee(int x);
inline HostReturnTy implicit_hd_caller() {
  return callee(1.0);
}
#pragma clang force_cuda_host_device end
}

// Test resolving implicit host device candidate vs same-sided candidate.
// In host compilation, implicit host device caller choose implicit host
// device candidate and same-sided candidate with equal preference.
// Resolution result should not change with/without pragma.
namespace ImplicitHostDeviceVsSameSide {
HostReturnTy callee(int x);
#pragma clang force_cuda_host_device begin
HostDeviceReturnTy callee(double x);
inline HostDeviceReturnTy implicit_hd_caller() {
  return callee(1.0);
}
#pragma clang force_cuda_host_device end
}

// Test resolving explicit host device candidate vs. wrong-sided candidate.
// When -fgpu-defer-diag is off, wrong-sided candidate is not excluded, therefore
// the first callee is chosen.
// When -fgpu-defer-diag is on, wrong-sided candidate is excluded, therefore
// the second callee is chosen.
namespace ExplicitHostDeviceVsWrongSided {
HostReturnTy callee(double x);
__host__ __device__ HostDeviceReturnTy callee(int x);
#if __CUDA_ARCH__ && DEFER
typedef HostDeviceReturnTy ExpectedRetTy;
#else
typedef HostReturnTy ExpectedRetTy;
#endif
inline __host__ __device__ ExpectedRetTy explicit_hd_caller() {
  return callee(1.0);
}
}

// In the implicit host device function 'caller', the second 'callee' should be
// chosen since it has better match, even though it is an implicit host device
// function whereas the first 'callee' is a host function. A diagnostic will be
// emitted if the first 'callee' is chosen since deduced return type cannot be
// used before it is defined.
namespace ImplicitHostDeviceByConstExpr {
template <class a> a b;
auto callee(...);
template <class d> constexpr auto callee(d) -> decltype(0);
struct e {
  template <class ad, class... f> static auto g(ad, f...) {
    return h<e, decltype(b<f>)...>;
  }
  struct i {
    template <class, class... f> static constexpr auto caller(f... k) {
      return callee(k...);
    }
  };
  template <class, class... f> static auto h() {
    return i::caller<int, f...>;
  }
};
class l {
  l() {
    e::g([] {}, this);
  }
};
}

// Implicit HD candidate competes with device candidate.
// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved.
// copy ctor of a should win over a(short), otherwise there will be ambiguity
// due to conversion operator.
namespace TestImplicitHDWithD {
  struct a {
    __device__ a(short);
    __device__ operator unsigned() const;
    __device__ operator int() const;
  };
  struct b {
    a d;
  };
  void f(b g) { b e = g; }
}

// Implicit HD candidate competes with host candidate.
// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved.
// copy ctor of a should win over a(short), otherwise there will be ambiguity
// due to conversion operator.
namespace TestImplicitHDWithH {
  struct a {
    a(short);
    __device__ operator unsigned() const;
    __device__ operator int() const;
  };
  struct b {
    a d;
  };
  void f(b g) { b e = g; }
}

// Implicit HD candidate competes with HD candidate.
// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved.
// copy ctor of a should win over a(short), otherwise there will be ambiguity
// due to conversion operator.
namespace TestImplicitHDWithHD {
  struct a {
    __host__ __device__ a(short);
    __device__ operator unsigned() const;
    __device__ operator int() const;
  };
  struct b {
    a d;
  };
  void f(b g) { b e = g; }
}

// HD candidate competes with H candidate.
// HD has type mismatch whereas H has type match.
// In device compilation, H wins when -fgpu-defer-diag is off and HD wins
// when -fgpu-defer-diags is on. In both cases the diagnostic should be
// deferred.
namespace TestDeferNoMatchingFuncNotEmitted {
  template <typename> struct a {};
  namespace b {
    struct c : a<int> {};
    template <typename d> void ag(d);
  } // namespace b
  template <typename ae>
  __host__ __device__ void ag(a<ae>) {
    ae e;
    ag(e);
  }
  void f() { (void)ag<b::c>; }
}

namespace TestDeferNoMatchingFuncEmitted {
  template <typename> struct a {};
  namespace b {
    struct c : a<int> {};
    template <typename d> void ag(d);
    // devnodeferonly-note@-1{{'ag<TestDeferNoMatchingFuncEmitted::b::c>' declared here}}
  } // namespace b
  template <typename ae>
  __host__ __device__ void ag(a<ae>) {
    ae e;
    ag(e);
    // devnodeferonly-error@-1{{reference to __host__ function 'ag<TestDeferNoMatchingFuncEmitted::b::c>' in __host__ __device__ function}}
    // devdeferonly-error@-2{{no matching function for call to 'ag'}}
    // devdeferonly-note@-3{{called by 'ag<TestDeferNoMatchingFuncEmitted::b::c>'}}
  }
  __host__ __device__ void f() { (void)ag<b::c>; }
  // devnodeferonly-note@-1{{called by 'f'}}
  // devdeferonly-note@-2{{called by 'f'}}
}

// Two HD candidates compete with H candidate.
// HDs have type mismatch whereas H has type match.
// In device compilation, H wins when -fgpu-defer-diag is off and two HD win
// when -fgpu-defer-diags is on. In both cases the diagnostic should be
// deferred.
namespace TestDeferAmbiguityNotEmitted {
  template <typename> struct a {};
  namespace b {
    struct c : a<int> {};
    template <typename d> void ag(d, int);
  } // namespace b
  template <typename ae>
  __host__ __device__ void ag(a<ae>, float) {
    ae e;
    ag(e, 1);
  }
  template <typename ae>
  __host__ __device__ void ag(a<ae>, double) {
  }
  void f() {
    b::c x;
    ag(x, 1);
  }
}

namespace TestDeferAmbiguityEmitted {
  template <typename> struct a {};
  namespace b {
    struct c : a<int> {};
    template <typename d> void ag(d, int);
    // devnodeferonly-note@-1{{'ag<TestDeferAmbiguityEmitted::b::c>' declared here}}
  } // namespace b
  template <typename ae>
  __host__ __device__ void ag(a<ae>, float) {
    // devdeferonly-note@-1{{candidate function [with ae = int]}}
    ae e;
    ag(e, 1);
  }
  template <typename ae>
  __host__ __device__ void ag(a<ae>, double) {
    // devdeferonly-note@-1{{candidate function [with ae = int]}}
  }
  __host__ __device__ void f() {
    b::c x;
    ag(x, 1);
    // devnodeferonly-error@-1{{reference to __host__ function 'ag<TestDeferAmbiguityEmitted::b::c>' in __host__ __device__ function}}
    // devdeferonly-error@-2{{call to 'ag' is ambiguous}}
  }
}

// Implicit HD functions compute with H function and D function.
// In host compilation, foo(0.0, 2) should resolve to X::foo<double, int>.
// In device compilation, foo(0.0, 2) should resolve to foo(double, int).
// In either case there should be no ambiguity.
namespace TestImplicitHDWithHAndD {
  namespace X {
    inline double foo(double, double) { return 0;}
    inline constexpr float foo(float, float) { return 1;}
    inline constexpr long double foo(long double, long double) { return 2;}
    template<typename _Tp, typename _Up> inline constexpr double foo(_Tp, _Up) { return 3;}
  };
  using X::foo;
  inline __device__ double foo(double, double) { return 4;}
  inline __device__ float foo(float, int) { return 5;}
  inline __device__ float foo(int, int) { return 6;}
  inline __device__ double foo(double, int) { return 7;}
  inline __device__ float foo(float, float) { return 9;}
  template<typename _Tp, typename _Up> inline __device__ double foo(_Tp, _Up) { return 10;}

  int g() {
    return [](){
    return foo(0.0, 2);
    }();
  }
}