; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s ; Use readfirstlane to demonstrate when InstCombine deems an input to ; be trivially uniform. ; Constants are trivially uniform. define i32 @test_constant() { ; CHECK-LABEL: define i32 @test_constant( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 7 ; %r = call i32 @llvm.amdgcn.readfirstlane(i32 7) ret i32 %r } ; The result of an AlwaysUniform intrinsic is trivially uniform. define i32 @test_intrinsic(i32 %x) { ; CHECK-LABEL: define i32 @test_intrinsic( ; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) ; CHECK-NEXT: ret i32 [[Y]] ; %y = call i32 @llvm.amdgcn.readfirstlane(i32 %x) %r = call i32 @llvm.amdgcn.readfirstlane(i32 %y) ret i32 %r } ; In compute kernels, all arguments are trivially uniform. define amdgpu_kernel void @test_compute_i32(ptr %out, i32 %x) { ; CHECK-LABEL: define amdgpu_kernel void @test_compute_i32( ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: store i32 [[X]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void ; %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) store i32 %r, ptr %out ret void } define amdgpu_kernel void @test_compute_i1(ptr %out, i1 %x) { ; CHECK-LABEL: define amdgpu_kernel void @test_compute_i1( ; CHECK-SAME: ptr [[OUT:%.*]], i1 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: store i1 [[X]], ptr [[OUT]], align 1 ; CHECK-NEXT: ret void ; %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) store i1 %r, ptr %out ret void } define amdgpu_kernel void @test_compute_v32i1(ptr %out, <32 x i1> %x) { ; CHECK-LABEL: define amdgpu_kernel void @test_compute_v32i1( ; CHECK-SAME: ptr [[OUT:%.*]], <32 x i1> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: store <32 x i1> [[X]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void ; %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) store <32 x i1> %r, ptr %out ret void } ; In graphics shaders, inreg arguments are trivially uniform. define amdgpu_ps i32 @test_graphics_i32(i32 inreg %x) { ; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32( ; CHECK-SAME: i32 inreg [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32 [[X]] ; %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) ret i32 %r } define amdgpu_ps i1 @test_graphics_i1(i1 inreg %x) { ; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1( ; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i1 [[X]] ; %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) ret i1 %r } define amdgpu_ps <32 x i1> @test_graphics_v32i1(<32 x i1> inreg %x) { ; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1( ; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret <32 x i1> [[X]] ; %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) ret <32 x i1> %r } ; In graphics shaders, non-inreg arguments are not trivially uniform. define amdgpu_ps i32 @test_graphics_i32_negative(i32 %x) { ; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32_negative( ; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) ret i32 %r } define amdgpu_ps i1 @test_graphics_i1_negative(i1 %x) { ; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1_negative( ; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]]) ; CHECK-NEXT: ret i1 [[R]] ; %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) ret i1 %r } define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(<32 x i1> %x) { ; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative( ; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]]) ; CHECK-NEXT: ret <32 x i1> [[R]] ; %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) ret <32 x i1> %r } ; Test i1 arguments in non-entry functions. define amdgpu_gfx i1 @test_callable_i1(i1 inreg %x) { ; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1( ; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i1 [[X]] ; %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) ret i1 %r } define amdgpu_gfx <32 x i1> @test_callable_v32i1(<32 x i1> inreg %x) { ; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1( ; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret <32 x i1> [[X]] ; %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) ret <32 x i1> %r } define amdgpu_gfx i1 @test_callable_i1_negative(i1 %x) { ; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1_negative( ; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]]) ; CHECK-NEXT: ret i1 [[R]] ; %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x) ret i1 %r } define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative(<32 x i1> %x) { ; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative( ; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]]) ; CHECK-NEXT: ret <32 x i1> [[R]] ; %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x) ret <32 x i1> %r }