diff options
Diffstat (limited to 'llvm/test')
8 files changed, 374 insertions, 89 deletions
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/RWStructuredBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/RWStructuredBufferNonUniformIdx.ll new file mode 100644 index 0000000..2a12baf --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/RWStructuredBufferNonUniformIdx.ll @@ -0,0 +1,26 @@ +; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Shader +; CHECK-DAG: OpCapability ShaderNonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate %[[#access1:]] NonUniformEXT +@ReadWriteStructuredBuf.str = private unnamed_addr constant [23 x i8] c"ReadWriteStructuredBuf\00", align 1 + +define void @main() local_unnamed_addr #0 { +entry: + %0 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0) + %add.i = add i32 %0, 1 + %1 = tail call noundef i32 @llvm.spv.resource.nonuniformindex(i32 %add.i) + %2 = tail call target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_a0v4i32_12_1t(i32 0, i32 0, i32 64, i32 %1, ptr nonnull @ReadWriteStructuredBuf.str) + %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4i32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) %2, i32 98) + %4 = load <4 x i32>, ptr addrspace(11) %3, align 16 + %vecins.i = insertelement <4 x i32> %4, i32 99, i64 0 +; CHECK: %[[#access1]] = OpAccessChain {{.*}} +; CHECK: OpStore %[[#access1]] {{%[0-9]+}} Aligned 16 + store <4 x i32> %vecins.i, ptr addrspace(11) %3, align 16 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/StructuredBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/StructuredBufferNonUniformIdx.ll new file mode 100644 index 0000000..92efad9 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/NonUniformIdx/StructuredBufferNonUniformIdx.ll @@ -0,0 +1,24 @@ +; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Shader +; CHECK-DAG: OpCapability ShaderNonUniformEXT +; CHECK-DAG: OpCapability StorageTexelBufferArrayNonUniformIndexingEXT +; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT +; CHECK-DAG: OpDecorate %[[#access:]] NonUniformEXT +; CHECK-DAG: OpDecorate %[[#load:]] NonUniformEXT +@ReadWriteBuf.str = private unnamed_addr constant [13 x i8] c"ReadWriteBuf\00", align 1 + +define void @main() local_unnamed_addr #0 { +entry: + %0 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0) + %1 = tail call noundef i32 @llvm.spv.resource.nonuniformindex(i32 %0) + %2 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 64, i32 %1, ptr nonnull @ReadWriteBuf.str) + %3 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %2, i32 96) +; CHECK: {{%[0-9]+}} = OpCompositeExtract {{.*}} +; CHECK: %[[#access]] = OpAccessChain {{.*}} +; CHECK: %[[#load]] = OpLoad {{%[0-9]+}} %[[#access]] +; CHECK: OpImageWrite %[[#load]] {{%[0-9]+}} {{%[0-9]+}} + store i32 95, ptr addrspace(11) %3, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll deleted file mode 100644 index 5e15aab..0000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; This test depends on llvm.svp.resource.nonuniformindex support (not yet implemented) -; https://github.com/llvm/llvm-project/issues/160231 -; XFAIL: * - -@.str.b0 = private unnamed_addr constant [3 x i8] c"B0\00", align 1 - -; CHECK-DAG: OpCapability Shader -; CHECK-DAG: OpCapability ShaderNonUniformEXT -; CHECK-DAG: OpCapability StorageImageArrayNonUniformIndexing -; CHECK-DAG: OpCapability Image1D -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0{{$}} -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] - %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 0, ptr nonnull @.str.b0) - %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer0, i32 0) - store i32 0, ptr %ptr0, align 4 - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] - %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 1, ptr nonnull @.str.b0) - %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer1, i32 0) - store i32 0, ptr %ptr1, align 4 - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll index 211a7bc..e2b6167 100644 --- a/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll +++ b/llvm/test/DebugInfo/Generic/compileunit-source-language-name.ll @@ -4,6 +4,11 @@ @x = global i32 0, align 4, !dbg !0 +; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) +define void @_Z4funcv() !dbg !8 { + ret void, !dbg !11 +} + !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!6, !7} @@ -15,3 +20,7 @@ !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !6 = !{i32 7, !"Dwarf Version", i32 5} !7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", scope: !3, file: !3, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 14, scope: !8) diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll index b946bbf..14ee00d 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p indvars -S %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + declare void @foo() define void @narrow_iv_precondition_order_1(ptr %start, i32 %base, i8 %n) { @@ -96,3 +98,202 @@ loop: exit: ret void } + +define i32 @urem_order1(i32 %n) { +; CHECK-LABEL: define i32 @urem_order1( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[N]], 3 +; CHECK-NEXT: [[UREM_ZERO:%.*]] = icmp eq i32 [[UREM]], 0 +; CHECK-NEXT: br i1 [[UREM_ZERO]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N_NON_ZERO:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: br i1 [[N_NON_ZERO]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 3 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[PH]] ], [ 3, %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %urem = urem i32 %n, 3 + %urem.zero = icmp eq i32 %urem, 0 + br i1 %urem.zero, label %ph, label %exit + +ph: + %n.non.zero = icmp ne i32 %n, 0 + br i1 %n.non.zero, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ] + call void @foo() + %iv.next = add i32 %iv, 3 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ 1, %entry ], [ 2, %ph ], [ 3, %loop ] + ret i32 %res +} + +define i32 @urem_order2(i32 %n) { +; CHECK-LABEL: define i32 @urem_order2( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[N_NON_ZERO:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: br i1 [[N_NON_ZERO]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[N]], 3 +; CHECK-NEXT: [[UREM_ZERO:%.*]] = icmp eq i32 [[UREM]], 0 +; CHECK-NEXT: br i1 [[UREM_ZERO]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 3 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[PH]] ], [ 3, %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %n.non.zero = icmp ne i32 %n, 0 + br i1 %n.non.zero, label %ph, label %exit + +ph: + %urem = urem i32 %n, 3 + %urem.zero = icmp eq i32 %urem, 0 + br i1 %urem.zero, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ] + call void @foo() + %iv.next = add i32 %iv, 3 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ 1, %entry ], [ 2, %ph ], [ 3, %loop ] + ret i32 %res +} + +define i64 @test_loop_with_div_order_1(i64 %n) { +; CHECK-LABEL: define i64 @test_loop_with_div_order_1( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[IS_ZERO]], label %[[EXIT:.*]], label %[[CHECK_BOUNDS:.*]] +; CHECK: [[CHECK_BOUNDS]]: +; CHECK-NEXT: [[N_PLUS_63:%.*]] = add i64 [[N]], 63 +; CHECK-NEXT: [[UPPER_BOUND:%.*]] = lshr i64 [[N_PLUS_63]], 6 +; CHECK-NEXT: [[BOUNDS_CHECK:%.*]] = icmp ult i64 [[N_PLUS_63]], 64 +; CHECK-NEXT: br i1 [[BOUNDS_CHECK]], label %[[EXIT]], label %[[CHECK_PARITY:.*]] +; CHECK: [[CHECK_PARITY]]: +; CHECK-NEXT: [[IS_ODD:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 +; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[UPPER_BOUND]], i64 1) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %is_zero = icmp eq i64 %n, 0 + br i1 %is_zero, label %exit, label %check_bounds + +check_bounds: + %n_plus_63 = add i64 %n, 63 + %upper_bound = lshr i64 %n_plus_63, 6 + %bounds_check = icmp ult i64 %n_plus_63, 64 + br i1 %bounds_check, label %exit, label %check_parity + +check_parity: + %is_odd = and i64 %n, 1 + %parity_check = icmp eq i64 %is_odd, 0 + br i1 %parity_check, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv_next, %loop ], [ 0, %check_parity ] + %dummy = load volatile i64, ptr null, align 8 + %iv_next = add i64 %iv, 1 + %exit_cond = icmp ult i64 %iv_next, %upper_bound + br i1 %exit_cond, label %loop, label %exit + +exit: + ret i64 0 +} + +define i64 @test_loop_with_div_order_2(i64 %n) { +; CHECK-LABEL: define i64 @test_loop_with_div_order_2( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[N_PLUS_63:%.*]] = add i64 [[N]], 63 +; CHECK-NEXT: [[UPPER_BOUND:%.*]] = lshr i64 [[N_PLUS_63]], 6 +; CHECK-NEXT: [[BOUNDS_CHECK:%.*]] = icmp ult i64 [[N_PLUS_63]], 64 +; CHECK-NEXT: br i1 [[BOUNDS_CHECK]], label %[[EXIT:.*]], label %[[CHECK_BOUNDS:.*]] +; CHECK: [[CHECK_BOUNDS]]: +; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[IS_ZERO]], label %[[EXIT]], label %[[CHECK_PARITY:.*]] +; CHECK: [[CHECK_PARITY]]: +; CHECK-NEXT: [[IS_ODD:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 +; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UPPER_BOUND]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %n_plus_63 = add i64 %n, 63 + %upper_bound = lshr i64 %n_plus_63, 6 + %bounds_check = icmp ult i64 %n_plus_63, 64 + br i1 %bounds_check, label %exit, label %check_bounds + +check_bounds: + %is_zero = icmp eq i64 %n, 0 + br i1 %is_zero, label %exit, label %check_parity + +check_parity: + %is_odd = and i64 %n, 1 + %parity_check = icmp eq i64 %is_odd, 0 + br i1 %parity_check, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv_next, %loop ], [ 0, %check_parity ] + %dummy = load volatile i64, ptr null, align 8 + %iv_next = add i64 %iv, 1 + %exit_cond = icmp ult i64 %iv_next, %upper_bound + br i1 %exit_cond, label %loop, label %exit + +exit: + ret i64 0 +} diff --git a/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll b/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll index df1399d..a8db6a0 100644 --- a/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll +++ b/llvm/test/Transforms/LoopSimplifyCFG/pr117537.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes='print<scalar-evolution>,loop-mssa(licm,loop-simplifycfg,loop-predication)' -verify-scev < %s 2>/dev/null | FileCheck %s +; RUN: opt -S -passes='print<scalar-evolution>,loop-mssa(licm,loop-simplifycfg,loop-predication)' -verify-scev < %s | FileCheck %s ; Make sure we don't assert due to insufficient SCEV invalidation. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll index 757d9e7..803ffa8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll @@ -1,42 +1,81 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 ; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s ; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s -; REMARKS: the cost-model indicates that vectorization is not beneficial +target triple = "arm64-apple-macosx" -; Test for https://github.com/llvm/llvm-project/issues/116375. -define void @test_i24_load_for(ptr noalias %src, ptr %dst) { -; CHECK-LABEL: define void @test_i24_load_for( -; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]] -; CHECK-NEXT: [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]] -; CHECK-NEXT: store i24 [[FOR]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; REMARKS: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): load +; Test case for https://github.com/llvm/llvm-project/issues/160792. +define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noalias %c) #0 { +; CHECK-LABEL: define void @replicate_sdiv_conditional( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i32> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = ashr <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = sext <vscale x 4 x i32> [[TMP9]] to <vscale x 4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP7]], <vscale x 4 x i64> splat (i64 1) +; CHECK-NEXT: [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = trunc <vscale x 4 x i64> [[TMP12]] to <vscale x 4 x i32> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 4 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; entry: - br label %loop + br label %loop.header -loop: - %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] - %for = phi i24 [ 0, %entry ], [ %for.next, %loop ] - %iv.next = add i16 %iv, 1 - %gep.src = getelementptr inbounds i24, ptr %src, i16 %iv - %for.next = load i24, ptr %gep.src, align 1 - %gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv - store i24 %for, ptr %gep.dst - %ec = icmp eq i16 %iv.next, 1000 - br i1 %ec, label %exit, label %loop +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + %val.c = load i32, ptr %gep.c, align 4 + %cmp = icmp slt i32 %val.c, 0 + br i1 %cmp, label %if.then, label %loop.latch -exit: +if.then: + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %val.b = load i32, ptr %gep.b, align 4 + %sext = sext i32 %val.b to i64 + %shr = ashr i32 %val.b, 1 + %add = add i32 %shr, %val.c + %conv = sext i32 %add to i64 + %div = sdiv i64 %conv, %sext + %trunc = trunc i64 %div to i32 + br label %loop.latch + +loop.latch: + %result = phi i32 [ %trunc, %if.then ], [ %val.c, %loop.header ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %result, ptr %gep.a, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exit = icmp eq i64 %iv.next, 64 + br i1 %exit, label %for.end, label %loop.header + +for.end: ret void } + +attributes #0 = { "target-cpu"="neoverse-512tvb" } diff --git a/llvm/test/Transforms/LoopVectorize/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/invalid-costs.ll new file mode 100644 index 0000000..757d9e7 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/invalid-costs.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s +; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s + +; REMARKS: the cost-model indicates that vectorization is not beneficial + +; Test for https://github.com/llvm/llvm-project/issues/116375. +define void @test_i24_load_for(ptr noalias %src, ptr %dst) { +; CHECK-LABEL: define void @test_i24_load_for( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]] +; CHECK-NEXT: [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]] +; CHECK-NEXT: store i24 [[FOR]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %for = phi i24 [ 0, %entry ], [ %for.next, %loop ] + %iv.next = add i16 %iv, 1 + %gep.src = getelementptr inbounds i24, ptr %src, i16 %iv + %for.next = load i24, ptr %gep.src, align 1 + %gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv + store i24 %for, ptr %gep.dst + %ec = icmp eq i16 %iv.next, 1000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} |