aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll
blob: 459615139d7452266af49d09c7e73628595e847a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --check-globals
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s

@lds.1 = internal unnamed_addr addrspace(3) global [2 x i8] poison, align 1

; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { [2 x i8] }
; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [2 x i8] }
; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i32 }
; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i8] }
; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i8] }
; CHECK: %llvm.amdgcn.kernel.k5.lds.t = type { [505 x i32] }
; CHECK: %llvm.amdgcn.kernel.k6.lds.t = type { [4 x i32] }

; Use constant from different kernels
;.
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 2, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 2, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 2, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t poison, align 16, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t poison, align 16, !absolute_symbol !0
;.
define amdgpu_kernel void @k0(i64 %x) {
; CHECK-LABEL: @k0(
; CHECK-NEXT:    %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds to ptr
; CHECK-NEXT:    %ptr = getelementptr inbounds i8, ptr %1, i64 %x
; CHECK-NEXT:    store i8 1, ptr %ptr, align 1
; CHECK-NEXT:    ret void
;
  %ptr = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(3) @lds.1 to ptr), i64 %x
  store i8 1, ptr addrspace(0) %ptr, align 1
  ret void
}

define amdgpu_kernel void @k1(i64 %x) {
; CHECK-LABEL: @k1(
; CHECK-NEXT:    %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds to ptr
; CHECK-NEXT:    %ptr = getelementptr inbounds i8, ptr %1, i64 %x
; CHECK-NEXT:    store i8 1, ptr %ptr, align 1
; CHECK-NEXT:    ret void
;
  %ptr = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(3) @lds.1 to ptr), i64 %x
  store i8 1, ptr addrspace(0) %ptr, align 1
  ret void
}

@lds.2 = internal unnamed_addr addrspace(3) global i32 poison, align 4

; Use constant twice from the same kernel
define amdgpu_kernel void @k2(i64 %x) {
; CHECK-LABEL: @k2(
; CHECK-NEXT:    store i8 1, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 4
; CHECK-NEXT:    store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.k2.lds, align 4
; CHECK-NEXT:    ret void
;
  store i8 1, ptr addrspace(3) @lds.2, align 4
  store i8 2, ptr addrspace(3) @lds.2, align 4
  ret void
}

@lds.3 = internal unnamed_addr addrspace(3) global [32 x i8] poison, align 1

; Use constant twice from the same kernel but a different other constant.
define amdgpu_kernel void @k3(i64 %x) {
; CHECK-LABEL: @k3(
; CHECK-NEXT:    %1 = getelementptr inbounds [32 x i8], ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, i32 0, i32 16
; CHECK-NEXT:    %ptr1 = addrspacecast ptr addrspace(3) %1 to ptr
; CHECK-NEXT:    store i64 1, ptr %ptr1, align 16
; CHECK-NEXT:    %2 = getelementptr inbounds [32 x i8], ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, i32 0, i32 24
; CHECK-NEXT:    %ptr2 = addrspacecast ptr addrspace(3) %2 to ptr
; CHECK-NEXT:    store i64 2, ptr %ptr2, align 8
; CHECK-NEXT:    ret void
;
  %ptr1 = addrspacecast ptr addrspace(3) getelementptr inbounds ([32 x i8], ptr addrspace(3) @lds.3, i32 0, i32 16) to ptr
  store i64 1, ptr %ptr1, align 1
  %ptr2 = addrspacecast ptr addrspace(3) getelementptr inbounds ([32 x i8], ptr addrspace(3) @lds.3, i32 0, i32 24) to ptr
  store i64 2, ptr %ptr2, align 1
  ret void
}

; @lds.1 is used from constant expressions in different kernels.
define amdgpu_kernel void @k4(i64 %x) {
; CHECK-LABEL: @k4(
; CHECK-NEXT:    %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k4.lds to ptr
; CHECK-NEXT:    %ptr = getelementptr inbounds i8, ptr %1, i64 %x
; CHECK-NEXT:    store i8 1, ptr %ptr, align 1
; CHECK-NEXT:    ret void
;
  %ptr = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(3) @lds.1 to ptr), i64 %x
  store i8 1, ptr addrspace(0) %ptr, align 1
  ret void
}

@lds.4 = internal unnamed_addr addrspace(3) global [505 x i32] poison, align 4

; Multiple constexpr use in a same instruction.
define amdgpu_kernel void @k5() {
; CHECK-LABEL: @k5(
; CHECK-NEXT:    %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
; CHECK-NEXT:    call void poison(ptr %1, ptr %1)
; CHECK-NEXT:    ret void
;
  call void poison(ptr addrspacecast (ptr addrspace(3) @lds.4 to ptr), ptr addrspacecast (ptr addrspace(3) @lds.4 to ptr))
  ret void
}

@lds.5 = internal addrspace(3) global [4 x i32] poison, align 4

; Both the *value* and *pointer* operands of store instruction are constant expressions, and
; both of these constant expression paths use same lds - @lds.5. Hence both of these constant
; expression operands of store should be replaced by equivalent instruction sequences.
define amdgpu_kernel void @k6() {
; CHECK-LABEL: @k6(
; CHECK-NEXT:    %1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
; CHECK-NEXT:    %2 = ptrtoint ptr addrspace(3) %1 to i32
; CHECK-NEXT:    %3 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
; CHECK-NEXT:    store i32 %2, ptr addrspace(3) %3, align 8
; CHECK-NEXT:    ret void
;

  store i32 ptrtoint (ptr addrspace(3) getelementptr inbounds ([4 x i32], ptr addrspace(3) @lds.5, i32 0, i32 2) to i32), ptr addrspace(3) getelementptr inbounds ([4 x i32], ptr addrspace(3) @lds.5, i32 0, i32 2)
  ret void
}
;.
; CHECK: attributes #0 = { "amdgpu-lds-size"="2" }
; CHECK: attributes #1 = { "amdgpu-lds-size"="4" }
; CHECK: attributes #2 = { "amdgpu-lds-size"="32" }
; CHECK: attributes #3 = { "amdgpu-lds-size"="2020" }
; CHECK: attributes #4 = { "amdgpu-lds-size"="16" }
;.
; CHECK: !0 = !{i32 0, i32 1}
;.