; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck %s define i32 @num_blocks_x() { ; CHECK-LABEL: define i32 @num_blocks_x() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0:![0-9]+]], !noundef [[META0]] ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i16 %wg_size_x to i32 %count_x = udiv i32 %grid_size_x, %conv_x ret i32 %count_x } define i32 @num_blocks_y() { ; CHECK-LABEL: define i32 @num_blocks_y() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_y = getelementptr i8, ptr addrspace(4) %dispatch, i32 16 %grid_size_y = load i32, ptr addrspace(4) %d_gep_y, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_y = getelementptr i8, ptr addrspace(4) %implicitarg, i32 14 %wg_size_y = load i16, ptr addrspace(4) %i_gep_y, align 2 %conv_y = zext i16 %wg_size_y to i32 %count_y = udiv i32 %grid_size_y, %conv_y ret i32 %count_y } define i32 @num_blocks_z() { ; CHECK-LABEL: define i32 @num_blocks_z() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 8 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_z = getelementptr i8, ptr addrspace(4) %dispatch, i32 20 %grid_size_z = load i32, ptr addrspace(4) %d_gep_z, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_z = getelementptr i8, ptr addrspace(4) %implicitarg, i32 16 %wg_size_z = load i16, ptr addrspace(4) %i_gep_z, align 2 %conv_z = zext i16 %wg_size_z to i32 %count_z = udiv i32 %grid_size_z, %conv_z ret i32 %count_z } define i32 @num_blocks(i32 %dim) { ; CHECK-LABEL: define i32 @num_blocks( ; CHECK-SAME: i32 [[DIM:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: switch i32 [[DIM]], label %[[DEFAULT:.*]] [ ; CHECK-NEXT: i32 0, label %[[DIM_X:.*]] ; CHECK-NEXT: i32 1, label %[[DIM_Y:.*]] ; CHECK-NEXT: i32 2, label %[[DIM_Z:.*]] ; CHECK-NEXT: ] ; CHECK: [[DIM_X]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[DIM_Y]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 4 ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[DIM_Z]]: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[DEFAULT]]: ; CHECK-NEXT: unreachable ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RETVAL_IN:%.*]] = phi ptr addrspace(4) [ [[TMP1]], %[[DIM_X]] ], [ [[TMP0]], %[[DIM_Y]] ], [ [[TMP2]], %[[DIM_Z]] ] ; CHECK-NEXT: [[RETVAL_0_I:%.*]] = load i32, ptr addrspace(4) [[RETVAL_IN]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; CHECK-NEXT: ret i32 [[RETVAL_0_I]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() switch i32 %dim, label %default [ i32 0, label %dim_x i32 1, label %dim_y i32 2, label %dim_z ] dim_x: %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i16 %wg_size_x to i32 %count_x = udiv i32 %grid_size_x, %conv_x br label %exit dim_y: %d_gep_y = getelementptr i8, ptr addrspace(4) %dispatch, i32 16 %grid_size_y = load i32, ptr addrspace(4) %d_gep_y, align 4 %i_gep_y = getelementptr i8, ptr addrspace(4) %implicitarg, i32 14 %wg_size_y = load i16, ptr addrspace(4) %i_gep_y, align 2 %conv_y = zext i16 %wg_size_y to i32 %count_y = udiv i32 %grid_size_y, %conv_y br label %exit dim_z: %d_gep_z = getelementptr i8, ptr addrspace(4) %dispatch, i32 20 %grid_size_z = load i32, ptr addrspace(4) %d_gep_z, align 4 %i_gep_z = getelementptr i8, ptr addrspace(4) %implicitarg, i32 16 %wg_size_z = load i16, ptr addrspace(4) %i_gep_z, align 2 %conv_z = zext i16 %wg_size_z to i32 %count_z = udiv i32 %grid_size_z, %conv_z br label %exit default: unreachable exit: %retval = phi i32 [ %count_x, %dim_x ], [ %count_y, %dim_y ], [ %count_z, %dim_z ] ret i32 %retval } define i64 @larger() { ; CHECK-LABEL: define i64 @larger() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; CHECK-NEXT: [[CONV_GRID_X:%.*]] = zext i32 [[GRID_SIZE_X]] to i64 ; CHECK-NEXT: ret i64 [[CONV_GRID_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i16 %wg_size_x to i64 %conv_grid_x = zext i32 %grid_size_x to i64 %count_x = udiv i64 %conv_grid_x, %conv_x ret i64 %count_x } define i32 @bad_offset() { ; CHECK-LABEL: define i32 @bad_offset() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16 ; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[D_GEP_Y]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 ; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2 ; CHECK-NEXT: [[CONV_X:%.*]] = zext i16 [[WG_SIZE_X]] to i32 ; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_Y]], [[CONV_X]] ; CHECK-NEXT: ret i32 [[COUNT_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_y = getelementptr i8, ptr addrspace(4) %dispatch, i32 16 %grid_size_y = load i32, ptr addrspace(4) %d_gep_y, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i16 %wg_size_x to i32 %count_x = udiv i32 %grid_size_y, %conv_x ret i32 %count_x } define i32 @dangling() { ; CHECK-LABEL: define i32 @dangling() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i16 %wg_size_x to i32 ret i32 %grid_size_x } define i32 @wrong_cast() { ; CHECK-LABEL: define i32 @wrong_cast() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 ; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2 ; CHECK-NEXT: [[CONV_X:%.*]] = sext i16 [[WG_SIZE_X]] to i32 ; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]] ; CHECK-NEXT: ret i32 [[COUNT_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = sext i16 %wg_size_x to i32 %count_x = udiv i32 %grid_size_x, %conv_x ret i32 %count_x } define i32 @wrong_size() { ; CHECK-LABEL: define i32 @wrong_size() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 ; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i8, ptr addrspace(4) [[I_GEP_X]], align 2 ; CHECK-NEXT: [[CONV_X:%.*]] = zext i8 [[WG_SIZE_X]] to i32 ; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]] ; CHECK-NEXT: ret i32 [[COUNT_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i8, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i8 %wg_size_x to i32 %count_x = udiv i32 %grid_size_x, %conv_x ret i32 %count_x } define i32 @wrong_intrinsic() { ; CHECK-LABEL: define i32 @wrong_intrinsic() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 ; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2 ; CHECK-NEXT: [[CONV_X:%.*]] = zext i16 [[WG_SIZE_X]] to i32 ; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]] ; CHECK-NEXT: ret i32 [[COUNT_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 16 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x = zext i16 %wg_size_x to i32 %count_x = udiv i32 %grid_size_x, %conv_x ret i32 %count_x } define i16 @empty_use() { ; CHECK-LABEL: define i16 @empty_use() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12 ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4 ; CHECK-NEXT: [[TRUNC_X:%.*]] = trunc i32 [[GRID_SIZE_X]] to i16 ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12 ; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2 ; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i16 [[TRUNC_X]], [[WG_SIZE_X]] ; CHECK-NEXT: ret i16 [[COUNT_X]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %trunc_x = trunc i32 %grid_size_x to i16 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %count_x = udiv i16 %trunc_x, %wg_size_x ret i16 %count_x } define i32 @multiple_use() { ; CHECK-LABEL: define i32 @multiple_use() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0]], !noundef [[META0]] ; CHECK-NEXT: [[SUM:%.*]] = shl i32 [[TMP0]], 1 ; CHECK-NEXT: ret i32 [[SUM]] ; entry: %dispatch = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %d_gep_x = getelementptr i8, ptr addrspace(4) %dispatch, i32 12 %grid_size_x = load i32, ptr addrspace(4) %d_gep_x, align 4 %implicitarg = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i_gep_x = getelementptr i8, ptr addrspace(4) %implicitarg, i32 12 %wg_size_x = load i16, ptr addrspace(4) %i_gep_x, align 2 %conv_x_1 = zext i16 %wg_size_x to i32 %count_x_1 = udiv i32 %grid_size_x, %conv_x_1 %conv_x_2 = zext i16 %wg_size_x to i32 %count_x_2 = udiv i32 %grid_size_x, %conv_x_2 %sum = add i32 %count_x_1, %count_x_2 ret i32 %sum } ;. ; CHECK: [[META0]] = !{} ;.