aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/amdgpu-max-num-workgroups-load-annotate.ll
blob: 9064292129928f0ef40906f55344d0ec9a5562fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-kernel-attributes %s | FileCheck %s

define i32 @use_grid_size_x_max_num_workgroups() #0 {
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0:![0-9]+]]
; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
  ret i32 %grid.size.x
}

define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range() #0 {
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0]]
; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4, !range !0
  ret i32 %grid.size.x
}

define i32 @use_grid_size_y_max_num_workgroups() #0 {
; CHECK-LABEL: define i32 @use_grid_size_y_max_num_workgroups(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GEP_GRID_SIZE_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 4
; CHECK-NEXT:    [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Y]], align 4, !range [[RNG1:![0-9]+]]
; CHECK-NEXT:    ret i32 [[GRID_SIZE_Y]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %gep.grid.size.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4
  %grid.size.y = load i32, ptr addrspace(4) %gep.grid.size.y, align 4
  ret i32 %grid.size.y
}

define i32 @use_grid_size_z_max_num_workgroups() #0 {
; CHECK-LABEL: define i32 @use_grid_size_z_max_num_workgroups(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GEP_GRID_SIZE_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 8
; CHECK-NEXT:    [[GRID_SIZE_Z:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Z]], align 4, !range [[RNG2:![0-9]+]]
; CHECK-NEXT:    ret i32 [[GRID_SIZE_Z]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %gep.grid.size.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8
  %grid.size.z = load i32, ptr addrspace(4) %gep.grid.size.z, align 4
  ret i32 %grid.size.z
}

define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type() #0 {
; CHECK-LABEL: define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load <2 x i16>, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
; CHECK-NEXT:    ret <2 x i16> [[GRID_SIZE_X]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %grid.size.x = load <2 x i16>, ptr addrspace(4) %implicitarg.ptr, align 4
  ret <2 x i16> %grid.size.x
}

define i32 @use_grid_size_x_max_num_workgroups_max_minus_1() #1 {
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max_minus_1(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG3:![0-9]+]]
; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
  ret i32 %grid.size.x
}

define i32 @use_grid_size_x_max_num_workgroups_max() #2 {
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max(
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
  ret i32 %grid.size.x
}

define i32 @use_grid_size_x_max_num_workgroups_zero() #3 {
; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_zero(
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
;
  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
  ret i32 %grid.size.x
}

declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3

attributes #0 = { "amdgpu-max-num-workgroups"="36,42,89" }
attributes #1 = { "amdgpu-max-num-workgroups"="4294967294,42,89" }
attributes #2 = { "amdgpu-max-num-workgroups"="4294967295,42,89" }
attributes #3 = { "amdgpu-max-num-workgroups"="0,42,89" }

!0 = !{i32 0, i32 -1}

;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-max-num-workgroups"="36,42,89" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="4294967294,42,89" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="4294967295,42,89" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="0,42,89" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
; CHECK: [[RNG0]] = !{i32 1, i32 37}
; CHECK: [[RNG1]] = !{i32 1, i32 43}
; CHECK: [[RNG2]] = !{i32 1, i32 90}
; CHECK: [[RNG3]] = !{i32 1, i32 -1}
;.