aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll
blob: 90877be255e0f44a70d293b8afb68237cd92af2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S < %s | FileCheck %s

%struct.type = type { [256 x <2 x i64>] }
@g1 = external hidden addrspace(3) global %struct.type, align 16

; This test requires the PtrReplacer to replace users in an RPO traversal.
; Furthermore, %ptr.else need not to be replaced so it must be retained in
; %ptr.sink.
define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
; CHECK-LABEL: define <2 x i64> @func(
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*:]]
; CHECK-NEXT:    br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; CHECK:       [[IF_THEN]]:
; CHECK-NEXT:    [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
; CHECK-NEXT:    br label %[[SINK:.*]]
; CHECK:       [[IF_ELSE]]:
; CHECK-NEXT:    [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
; CHECK-NEXT:    br label %[[SINK]]
; CHECK:       [[SINK]]:
; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr [ [[PTR_ELSE]], %[[IF_ELSE]] ], [ [[VAL_THEN]], %[[IF_THEN]] ]
; CHECK-NEXT:    [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_SINK]], align 16
; CHECK-NEXT:    ret <2 x i64> [[VAL_SINK]]
;
entry:
  %coerce = alloca %struct.type, align 16, addrspace(5)
  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
  br i1 %cmp.0, label %if.then, label %if.else

if.then:                                    ; preds = %entry
  %ptr.then = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
  %val.then = addrspacecast ptr addrspace(5) %ptr.then to ptr
  br label %sink

if.else:                                      ; preds = %entry
  %ptr.else = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
  %val.else = getelementptr inbounds nuw i8, ptr %ptr.else, i64 0
  br label %sink

sink:
  %ptr.sink = phi ptr [ %val.else, %if.else ], [ %val.then, %if.then ]
  %val.sink = load <2 x i64>, ptr %ptr.sink, align 16
  ret <2 x i64> %val.sink
}

define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
; CHECK-LABEL: define <2 x i64> @func_phi_loop(
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*]]:
; CHECK-NEXT:    [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
; CHECK-NEXT:    br label %[[LOOP:.*]]
; CHECK:       [[LOOP]]:
; CHECK-NEXT:    [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ]
; CHECK-NEXT:    [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
; CHECK-NEXT:    br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]]
; CHECK:       [[SINK]]:
; CHECK-NEXT:    [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16
; CHECK-NEXT:    ret <2 x i64> [[VAL_SINK]]
;
entry:
  %coerce = alloca %struct.type, align 16, addrspace(5)
  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
  %ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
  %val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr
  br label %loop

loop:
  %ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ]
  %ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
  %val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0
  br i1 %cmp.0, label %loop, label %sink

sink:
  %val.sink = load <2 x i64>, ptr %ptr.phi, align 16
  ret <2 x i64> %val.sink
}

; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5)
; operands.
define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5(
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
; CHECK-NEXT:  [[BB:.*:]]
; CHECK-NEXT:    ret void
;
bb:
  %alloca = alloca i32, i32 0, align 8, addrspace(5)
  %alloca1 = alloca [12 x i8], align 16, addrspace(5)
  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
  %select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca
  call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
  ret void
}

; Same as above but with swapped operands on the select.
define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped(
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
; CHECK-NEXT:  [[BB:.*:]]
; CHECK-NEXT:    ret void
;
bb:
  %alloca = alloca i32, i32 0, align 8, addrspace(5)
  %alloca1 = alloca [12 x i8], align 16, addrspace(5)
  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
  %select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1
  call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
  ret void
}

@global = external addrspace(1) constant [16 x float], align 64

define float @issue160302(i1 %cond, ptr addrspace(5) %arg) {
; CHECK-LABEL: define float @issue160302(
; CHECK-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[ARG:%.*]]) {
; CHECK-NEXT:    [[AGG_TMP2_I4:%.*]] = alloca [16 x float], align 64, addrspace(5)
; CHECK-NEXT:    [[SELECT_PTR:%.*]] = select i1 [[COND]], ptr addrspace(5) [[AGG_TMP2_I4]], ptr addrspace(5) [[ARG]]
; CHECK-NEXT:    [[COND_I:%.*]] = load float, ptr addrspace(5) [[SELECT_PTR]], align 4
; CHECK-NEXT:    ret float [[COND_I]]
;
  %agg.tmp2.i4 = alloca [16 x float], align 64, addrspace(5)
  call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) %agg.tmp2.i4, ptr addrspace(1) @global, i64 0, i1 false)
  %m_Data.i14.i = getelementptr [16 x float], ptr addrspace(5) %agg.tmp2.i4, i32 0, i32 0
  %gep = getelementptr [16 x float], ptr addrspace(5) %arg, i32 0, i32 0
  %select.ptr = select i1 %cond, ptr addrspace(5) %m_Data.i14.i, ptr addrspace(5) %gep
  %cond.i = load float, ptr addrspace(5) %select.ptr, align 4
  ret float %cond.i
}

declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0