1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
typedef int v2i __attribute__((ext_vector_type(2)));
typedef int v4i __attribute__((ext_vector_type(4)));
// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b8(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* laddr)
{
__builtin_amdgcn_global_load_async_to_lds_b8(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b32(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* laddr)
{
__builtin_amdgcn_global_load_async_to_lds_b32(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b64(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr)
{
__builtin_amdgcn_global_load_async_to_lds_b64(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b128(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* laddr)
{
__builtin_amdgcn_global_load_async_to_lds_b128(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b8(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* laddr)
{
__builtin_amdgcn_global_store_async_from_lds_b8(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b32(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* laddr)
{
__builtin_amdgcn_global_store_async_from_lds_b32(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b64(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* laddr)
{
__builtin_amdgcn_global_store_async_from_lds_b64(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b128(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* laddr)
{
__builtin_amdgcn_global_store_async_from_lds_b128(gaddr, laddr, 16, 0);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_async_barrier_arrive_b64(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR:%.*]])
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_ds_atomic_async_barrier_arrive_b64(local long* addr)
{
__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64(addr);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) [[ADDR:%.*]], i64 [[DATA:%.*]])
// CHECK-GFX1250-NEXT: store i64 [[TMP0]], ptr [[OUT:%.*]], align 8, !tbaa [[TBAA4:![0-9]+]]
// CHECK-GFX1250-NEXT: ret void
//
void test_amdgcn_ds_atomic_barrier_arrive_rtn_b64(local long* addr, long data, long *out)
{
*out = __builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64(addr, data);
}
|