; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S < %s | FileCheck %s %struct.type = type { [256 x <2 x i64>] } @g1 = external hidden addrspace(3) global %struct.type, align 16 ; This test requires the PtrReplacer to replace users in an RPO traversal. ; Furthermore, %ptr.else need not to be replaced so it must be retained in ; %ptr.sink. define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) { ; CHECK-LABEL: define <2 x i64> @func( ; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr ; CHECK-NEXT: br label %[[SINK:.*]] ; CHECK: [[IF_ELSE]]: ; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 ; CHECK-NEXT: br label %[[SINK]] ; CHECK: [[SINK]]: ; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr [ [[PTR_ELSE]], %[[IF_ELSE]] ], [ [[VAL_THEN]], %[[IF_THEN]] ] ; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_SINK]], align 16 ; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]] ; entry: %coerce = alloca %struct.type, align 16, addrspace(5) call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false) br i1 %cmp.0, label %if.then, label %if.else if.then: ; preds = %entry %ptr.then = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0 %val.then = addrspacecast ptr addrspace(5) %ptr.then to ptr br label %sink if.else: ; preds = %entry %ptr.else = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 %val.else = getelementptr inbounds nuw i8, ptr %ptr.else, i64 0 br label %sink sink: %ptr.sink = phi ptr [ %val.else, %if.else ], [ %val.then, %if.then ] %val.sink = load <2 x i64>, ptr %ptr.sink, align 16 ret <2 x i64> %val.sink } define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) { ; CHECK-LABEL: define <2 x i64> @func_phi_loop( ; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ] ; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 ; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]] ; CHECK: [[SINK]]: ; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16 ; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]] ; entry: %coerce = alloca %struct.type, align 16, addrspace(5) call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false) %ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0 %val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr br label %loop loop: %ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ] %ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 %val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0 br i1 %cmp.0, label %loop, label %sink sink: %val.sink = load <2 x i64>, ptr %ptr.phi, align 16 ret <2 x i64> %val.sink } ; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5) ; operands. define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { ; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5( ; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: ret void ; bb: %alloca = alloca i32, i32 0, align 8, addrspace(5) %alloca1 = alloca [12 x i8], align 16, addrspace(5) call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) %select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) ret void } ; Same as above but with swapped operands on the select. define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) { ; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped( ; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: ret void ; bb: %alloca = alloca i32, i32 0, align 8, addrspace(5) %alloca1 = alloca [12 x i8], align 16, addrspace(5) call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false) %select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1 call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false) ret void } declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0