; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca -disable-promote-alloca-to-lds=1 < %s | FileCheck --check-prefix=BASE --check-prefix=DEFAULT %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca -disable-promote-alloca-to-lds=1 -amdgpu-promote-alloca-to-vector-vgpr-ratio=2 < %s | FileCheck --check-prefix=BASE %s --check-prefix=RATIO2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca -disable-promote-alloca-to-lds=1 -amdgpu-promote-alloca-to-vector-vgpr-ratio=8 < %s | FileCheck --check-prefix=BASE %s --check-prefix=RATIO8 define amdgpu_kernel void @i32_24_elements(ptr %out) #0 { ; DEFAULT-LABEL: define amdgpu_kernel void @i32_24_elements( ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; DEFAULT-NEXT: [[ALLOCA:%.*]] = alloca [24 x i32], align 16, addrspace(5) ; DEFAULT-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 96, i1 false) ; DEFAULT-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 ; DEFAULT-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 20 ; DEFAULT-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 ; DEFAULT-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] ; DEFAULT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 ; DEFAULT-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 ; DEFAULT-NEXT: ret void ; ; RATIO2-LABEL: define amdgpu_kernel void @i32_24_elements( ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <24 x i32> poison ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <24 x i32> , i32 [[SEL2]] ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 ; RATIO2-NEXT: ret void ; ; RATIO8-LABEL: define amdgpu_kernel void @i32_24_elements( ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [24 x i32], align 16, addrspace(5) ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 96, i1 false) ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 20 ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 ; RATIO8-NEXT: ret void ; %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() %c1 = icmp uge i32 %x, 3 %c2 = icmp uge i32 %y, 3 %sel1 = select i1 %c1, i32 1, i32 2 %sel2 = select i1 %c2, i32 0, i32 %sel1 %alloca = alloca [24 x i32], align 16, addrspace(5) call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 96, i1 false) %gep.0 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 %gep.1 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 20 store i32 42, ptr addrspace(5) %gep.0 store i32 43, ptr addrspace(5) %gep.1 %gep = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 %load = load i32, ptr addrspace(5) %gep store i32 %load, ptr %out ret void } define amdgpu_kernel void @i32_24_elements_attrib(ptr %out) #1 { ; DEFAULT-LABEL: define amdgpu_kernel void @i32_24_elements_attrib( ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; DEFAULT-NEXT: [[ALLOCA:%.*]] = freeze <24 x i32> poison ; DEFAULT-NEXT: [[TMP1:%.*]] = extractelement <24 x i32> , i32 [[SEL2]] ; DEFAULT-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 ; DEFAULT-NEXT: ret void ; ; RATIO2-LABEL: define amdgpu_kernel void @i32_24_elements_attrib( ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <24 x i32> poison ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <24 x i32> , i32 [[SEL2]] ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 ; RATIO2-NEXT: ret void ; ; RATIO8-LABEL: define amdgpu_kernel void @i32_24_elements_attrib( ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [24 x i32], align 16, addrspace(5) ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 96, i1 false) ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 20 ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 ; RATIO8-NEXT: ret void ; %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() %c1 = icmp uge i32 %x, 3 %c2 = icmp uge i32 %y, 3 %sel1 = select i1 %c1, i32 1, i32 2 %sel2 = select i1 %c2, i32 0, i32 %sel1 %alloca = alloca [24 x i32], align 16, addrspace(5) call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 96, i1 false) %gep.0 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 %gep.1 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 20 store i32 42, ptr addrspace(5) %gep.0 store i32 43, ptr addrspace(5) %gep.1 %gep = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 %load = load i32, ptr addrspace(5) %gep store i32 %load, ptr %out ret void } define amdgpu_kernel void @i32_16_elements(ptr %out) #0 { ; DEFAULT-LABEL: define amdgpu_kernel void @i32_16_elements( ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; DEFAULT-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison ; DEFAULT-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> , i32 [[SEL2]] ; DEFAULT-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 ; DEFAULT-NEXT: ret void ; ; RATIO2-LABEL: define amdgpu_kernel void @i32_16_elements( ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR0]] { ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> , i32 [[SEL2]] ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 ; RATIO2-NEXT: ret void ; ; RATIO8-LABEL: define amdgpu_kernel void @i32_16_elements( ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR0]] { ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 16, addrspace(5) ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 64, i1 false) ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 15 ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 ; RATIO8-NEXT: ret void ; %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() %c1 = icmp uge i32 %x, 3 %c2 = icmp uge i32 %y, 3 %sel1 = select i1 %c1, i32 1, i32 2 %sel2 = select i1 %c2, i32 0, i32 %sel1 %alloca = alloca [16 x i32], align 16, addrspace(5) call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 64, i1 false) %gep.0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 %gep.1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 15 store i32 42, ptr addrspace(5) %gep.0 store i32 43, ptr addrspace(5) %gep.1 %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 %load = load i32, ptr addrspace(5) %gep store i32 %load, ptr %out ret void } define amdgpu_kernel void @i32_16_elements_attrib(ptr %out) #2 { ; DEFAULT-LABEL: define amdgpu_kernel void @i32_16_elements_attrib( ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR2:[0-9]+]] { ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; DEFAULT-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 16, addrspace(5) ; DEFAULT-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 64, i1 false) ; DEFAULT-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 ; DEFAULT-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 15 ; DEFAULT-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 ; DEFAULT-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] ; DEFAULT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 ; DEFAULT-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 ; DEFAULT-NEXT: ret void ; ; RATIO2-LABEL: define amdgpu_kernel void @i32_16_elements_attrib( ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR2:[0-9]+]] { ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> , i32 [[SEL2]] ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 ; RATIO2-NEXT: ret void ; ; RATIO8-LABEL: define amdgpu_kernel void @i32_16_elements_attrib( ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR2:[0-9]+]] { ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 16, addrspace(5) ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 64, i1 false) ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 15 ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 ; RATIO8-NEXT: ret void ; %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() %c1 = icmp uge i32 %x, 3 %c2 = icmp uge i32 %y, 3 %sel1 = select i1 %c1, i32 1, i32 2 %sel2 = select i1 %c2, i32 0, i32 %sel1 %alloca = alloca [16 x i32], align 16, addrspace(5) call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 64, i1 false) %gep.0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 %gep.1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 15 store i32 42, ptr addrspace(5) %gep.0 store i32 43, ptr addrspace(5) %gep.1 %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 %load = load i32, ptr addrspace(5) %gep store i32 %load, ptr %out ret void } declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workitem.id.y() declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) attributes #0 = { nounwind "amdgpu-promote-alloca-to-vector-max-regs"="24" "amdgpu-waves-per-eu"="4,4" } attributes #1 = { nounwind "amdgpu-promote-alloca-to-vector-max-regs"="24" "amdgpu-waves-per-eu"="4,4" "amdgpu-promote-alloca-to-vector-vgpr-ratio"="2" } attributes #2 = { nounwind "amdgpu-promote-alloca-to-vector-max-regs"="24" "amdgpu-waves-per-eu"="4,4" "amdgpu-promote-alloca-to-vector-vgpr-ratio"="8" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; BASE: {{.*}}