diff options
Diffstat (limited to 'llvm/test/Transforms')
7 files changed, 1438 insertions, 60 deletions
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 7b0b871..cc87d65 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -10,8 +10,8 @@ declare void @llvm.assume(i1) #1 ; Check that the assume has not been removed: -define i32 @foo1(ptr %a) #0 { -; DEFAULT-LABEL: @foo1( +define i32 @align_to_bundle(ptr %a) #0 { +; DEFAULT-LABEL: @align_to_bundle( ; DEFAULT-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4 ; DEFAULT-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64 ; DEFAULT-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 @@ -19,7 +19,7 @@ define i32 @foo1(ptr %a) #0 { ; DEFAULT-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) ; DEFAULT-NEXT: ret i32 [[T0]] ; -; BUNDLES-LABEL: @foo1( +; BUNDLES-LABEL: @align_to_bundle( ; BUNDLES-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4 ; BUNDLES-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 32) ] ; BUNDLES-NEXT: ret i32 [[T0]] @@ -32,6 +32,28 @@ define i32 @foo1(ptr %a) #0 { ret i32 %t0 } +define i32 @align_to_bundle_ptrtoaddr(ptr %a) #0 { +; DEFAULT-LABEL: @align_to_bundle_ptrtoaddr( +; DEFAULT-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4 +; DEFAULT-NEXT: [[PTRINT:%.*]] = ptrtoaddr ptr [[A]] to i64 +; DEFAULT-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; DEFAULT-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; DEFAULT-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; DEFAULT-NEXT: ret i32 [[T0]] +; +; BUNDLES-LABEL: @align_to_bundle_ptrtoaddr( +; BUNDLES-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4 +; BUNDLES-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 32) ] +; BUNDLES-NEXT: ret i32 [[T0]] +; + %t0 = load i32, ptr %a, align 4 + %ptrint = ptrtoaddr ptr %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + ret i32 %t0 +} + define i32 @align_assume_trunc_cond(ptr %a) #0 { ; DEFAULT-LABEL: @align_assume_trunc_cond( ; DEFAULT-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4 diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index a7434a2..adf3aa1 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -237,3 +237,75 @@ define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace( call void @use.i32(i32 %addr) ret ptr addrspace(1) %gep } + +define i64 @ptrtoaddr_of_ptrmask(ptr %p, i64 %mask) { +; CHECK-LABEL: define i64 @ptrtoaddr_of_ptrmask( +; CHECK-SAME: ptr [[P:%.*]], i64 [[MASK:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[ADDR:%.*]] = and i64 [[MASK]], [[TMP1]] +; CHECK-NEXT: ret i64 [[ADDR]] +; + %masked = call ptr @llvm.ptrmask(ptr %p, i64 %mask) + %addr = ptrtoaddr ptr %masked to i64 + ret i64 %addr +} + +define i32 @ptrtoaddr_of_ptrmask_addrsize(ptr addrspace(1) %p, i32 %mask) { +; CHECK-LABEL: define i32 @ptrtoaddr_of_ptrmask_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[MASK:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32 +; CHECK-NEXT: [[ADDR:%.*]] = and i32 [[MASK]], [[TMP1]] +; CHECK-NEXT: ret i32 [[ADDR]] +; + %masked = call ptr addrspace(1) @llvm.ptrmask(ptr addrspace(1) %p, i32 %mask) + %addr = ptrtoaddr ptr addrspace(1) %masked to i32 + ret i32 %addr +} + +define i64 @ptrtoaddr_of_gep_of_inttoptr(i64 %int, i64 %offset) { +; CHECK-LABEL: define i64 @ptrtoaddr_of_gep_of_inttoptr( +; CHECK-SAME: i64 [[INT:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[ADDR:%.*]] = add i64 [[INT]], [[OFFSET]] +; CHECK-NEXT: ret i64 [[ADDR]] +; + %ptr = inttoptr i64 %int to ptr + %gep = getelementptr i8, ptr %ptr, i64 %offset + %addr = ptrtoaddr ptr %gep to i64 + ret i64 %addr +} + +; FIXME: This could be supported by truncating %int before performing the +; arithmetic. +define i32 @ptrtoaddr_of_gep_of_inttoptr_addrsize(i64 %int, i32 %offset) { +; CHECK-LABEL: define i32 @ptrtoaddr_of_gep_of_inttoptr_addrsize( +; CHECK-SAME: i64 [[INT:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[PTR:%.*]] = inttoptr i64 [[INT]] to ptr addrspace(1) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PTR]], i32 [[OFFSET]] +; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[GEP]] to i32 +; CHECK-NEXT: ret i32 [[ADDR]] +; + %ptr = inttoptr i64 %int to ptr addrspace(1) + %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset + %addr = ptrtoaddr ptr addrspace(1) %gep to i32 + ret i32 %addr +} + +define i64 @ptrtoaddr_of_gep_of_null(i64 %offset) { +; CHECK-LABEL: define i64 @ptrtoaddr_of_gep_of_null( +; CHECK-SAME: i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i64 [[OFFSET]] +; + %gep = getelementptr i8, ptr null, i64 %offset + %addr = ptrtoaddr ptr %gep to i64 + ret i64 %addr +} + +define i32 @ptrtoaddr_of_gep_of_null_addrsize(i32 %offset) { +; CHECK-LABEL: define i32 @ptrtoaddr_of_gep_of_null_addrsize( +; CHECK-SAME: i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i32 [[OFFSET]] +; + %gep = getelementptr i8, ptr addrspace(1) null, i32 %offset + %addr = ptrtoaddr ptr addrspace(1) %gep to i32 + ret i32 %addr +} diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll index 9f9e3f9..77a7f0d 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll @@ -1,26 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s -; RUN: opt < %s -passes=instsimplify -use-constant-int-for-fixed-length-splat -S | FileCheck %s - -declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.mul.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.or.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a) -declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a) -declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a) - +; RUN: opt < %s -passes=instsimplify -use-constant-int-for-fixed-length-splat -use-constant-int-for-scalable-splat -S | FileCheck %s define i32 @add_0() { ; CHECK-LABEL: @add_0( @@ -30,6 +10,15 @@ define i32 @add_0() { ret i32 %x } +define i32 @add_0_scalable_vector() { +; CHECK-LABEL: @add_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @add_1() { ; CHECK-LABEL: @add_1( ; CHECK-NEXT: ret i32 8 @@ -38,6 +27,15 @@ define i32 @add_1() { ret i32 %x } +define i32 @add_1_scalable_vector() { +; CHECK-LABEL: @add_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> splat (i32 1)) + ret i32 %x +} + define i32 @add_inc() { ; CHECK-LABEL: @add_inc( ; CHECK-NEXT: ret i32 18 @@ -63,8 +61,17 @@ define i32 @add_undef() { ret i32 %x } -define i32 @add_undef1() { -; CHECK-LABEL: @add_undef1( +define i32 @add_undef_scalable_vector() { +; CHECK-LABEL: @add_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @add_undef_elt() { +; CHECK-LABEL: @add_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -80,8 +87,17 @@ define i32 @add_poison() { ret i32 %x } -define i32 @add_poison1() { -; CHECK-LABEL: @add_poison1( +define i32 @add_poison_scalable_vector() { +; CHECK-LABEL: @add_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @add_poison_elt() { +; CHECK-LABEL: @add_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> <i32 1, i32 1, i32 poison, i32 1, i32 1, i32 42, i32 1, i32 1>) @@ -105,6 +121,15 @@ define i32 @mul_0() { ret i32 %x } +define i32 @mul_0_scalable_vector() { +; CHECK-LABEL: @mul_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @mul_1() { ; CHECK-LABEL: @mul_1( ; CHECK-NEXT: ret i32 1 @@ -113,6 +138,15 @@ define i32 @mul_1() { ret i32 %x } +define i32 @mul_1_scalable_vector() { +; CHECK-LABEL: @mul_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1)) + ret i32 %x +} + define i32 @mul_inc() { ; CHECK-LABEL: @mul_inc( ; CHECK-NEXT: ret i32 40320 @@ -138,8 +172,17 @@ define i32 @mul_undef() { ret i32 %x } -define i32 @mul_undef1() { -; CHECK-LABEL: @mul_undef1( +define i32 @mul_undef_scalable_vector() { +; CHECK-LABEL: @mul_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @mul_undef_elt() { +; CHECK-LABEL: @mul_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -155,8 +198,17 @@ define i32 @mul_poison() { ret i32 %x } -define i32 @mul_poison1() { -; CHECK-LABEL: @mul_poison1( +define i32 @mul_poison_scalable_vector() { +; CHECK-LABEL: @mul_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @mul_poison_elt() { +; CHECK-LABEL: @mul_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> <i32 0, i32 1, i32 poison, i32 1, i32 1, i32 1, i32 1, i32 1>) @@ -171,6 +223,15 @@ define i32 @and_0() { ret i32 %x } +define i32 @and_0_scalable_vector() { +; CHECK-LABEL: @and_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @and_1() { ; CHECK-LABEL: @and_1( ; CHECK-NEXT: ret i32 1 @@ -179,6 +240,15 @@ define i32 @and_1() { ret i32 %x } +define i32 @and_1_scalable_vector() { +; CHECK-LABEL: @and_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1)) + ret i32 %x +} + define i32 @and_inc() { ; CHECK-LABEL: @and_inc( ; CHECK-NEXT: ret i32 0 @@ -204,8 +274,17 @@ define i32 @and_undef() { ret i32 %x } -define i32 @and_undef1() { -; CHECK-LABEL: @and_undef1( +define i32 @and_undef_scalable_vector() { +; CHECK-LABEL: @and_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @and_undef_elt() { +; CHECK-LABEL: @and_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -221,8 +300,17 @@ define i32 @and_poison() { ret i32 %x } -define i32 @and_poison1() { -; CHECK-LABEL: @and_poison1( +define i32 @and_poison_scalable_vector() { +; CHECK-LABEL: @and_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @and_poison_elt() { +; CHECK-LABEL: @and_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> <i32 -1, i32 1, i32 poison, i32 1, i32 1, i32 1, i32 1, i32 1>) @@ -237,6 +325,15 @@ define i32 @or_0() { ret i32 %x } +define i32 @or_0_scalable_vector() { +; CHECK-LABEL: @or_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @or_1() { ; CHECK-LABEL: @or_1( ; CHECK-NEXT: ret i32 1 @@ -245,6 +342,15 @@ define i32 @or_1() { ret i32 %x } +define i32 @or_1_scalable_vector() { +; CHECK-LABEL: @or_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1)) + ret i32 %x +} + define i32 @or_inc() { ; CHECK-LABEL: @or_inc( ; CHECK-NEXT: ret i32 -1 @@ -270,8 +376,17 @@ define i32 @or_undef() { ret i32 %x } -define i32 @or_undef1() { -; CHECK-LABEL: @or_undef1( +define i32 @or_undef_scalable_vector() { +; CHECK-LABEL: @or_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.or.v8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @or_undef_elt() { +; CHECK-LABEL: @or_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -287,8 +402,17 @@ define i32 @or_poison() { ret i32 %x } -define i32 @or_poison1() { -; CHECK-LABEL: @or_poison1( +define i32 @or_poison_scalable_vector() { +; CHECK-LABEL: @or_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @or_poison_elt() { +; CHECK-LABEL: @or_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> <i32 1, i32 0, i32 poison, i32 1, i32 1, i32 1, i32 1, i32 1>) @@ -303,6 +427,15 @@ define i32 @xor_0() { ret i32 %x } +define i32 @xor_0_scalable_vector() { +; CHECK-LABEL: @xor_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @xor_1() { ; CHECK-LABEL: @xor_1( ; CHECK-NEXT: ret i32 0 @@ -311,6 +444,15 @@ define i32 @xor_1() { ret i32 %x } +define i32 @xor_1_scalable_vector() { +; CHECK-LABEL: @xor_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat(i32 1)) + ret i32 %x +} + define i32 @xor_inc() { ; CHECK-LABEL: @xor_inc( ; CHECK-NEXT: ret i32 10 @@ -336,8 +478,17 @@ define i32 @xor_undef() { ret i32 %x } -define i32 @xor_undef1() { -; CHECK-LABEL: @xor_undef1( +define i32 @xor_undef_scalable_vector() { +; CHECK-LABEL: @xor_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @xor_undef_elt() { +; CHECK-LABEL: @xor_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -353,8 +504,17 @@ define i32 @xor_poison() { ret i32 %x } -define i32 @xor_poison1() { -; CHECK-LABEL: @xor_poison1( +define i32 @xor_poison_scalable_vector() { +; CHECK-LABEL: @xor_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @xor_poison_elt() { +; CHECK-LABEL: @xor_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> <i32 poison, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) @@ -369,6 +529,15 @@ define i32 @smin_0() { ret i32 %x } +define i32 @smin_0_scalable_vector() { +; CHECK-LABEL: @smin_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @smin_1() { ; CHECK-LABEL: @smin_1( ; CHECK-NEXT: ret i32 1 @@ -377,6 +546,15 @@ define i32 @smin_1() { ret i32 %x } +define i32 @smin_1_scalable_vector() { +; CHECK-LABEL: @smin_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat(i32 1)) + ret i32 %x +} + define i32 @smin_inc() { ; CHECK-LABEL: @smin_inc( ; CHECK-NEXT: ret i32 -6 @@ -402,8 +580,17 @@ define i32 @smin_undef() { ret i32 %x } -define i32 @smin_undef1() { -; CHECK-LABEL: @smin_undef1( +define i32 @smin_undef_scalable_vector() { +; CHECK-LABEL: @smin_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @smin_undef_elt() { +; CHECK-LABEL: @smin_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -419,8 +606,17 @@ define i32 @smin_poison() { ret i32 %x } -define i32 @smin_poison1() { -; CHECK-LABEL: @smin_poison1( +define i32 @smin_poison_scalable_vector() { +; CHECK-LABEL: @smin_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @smin_poison_elt() { +; CHECK-LABEL: @smin_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 poison, i32 1, i32 1, i32 1>) @@ -435,6 +631,15 @@ define i32 @smax_0() { ret i32 %x } +define i32 @smax_0_scalable_vector() { +; CHECK-LABEL: @smax_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @smax_1() { ; CHECK-LABEL: @smax_1( ; CHECK-NEXT: ret i32 1 @@ -443,6 +648,15 @@ define i32 @smax_1() { ret i32 %x } +define i32 @smax_1_scalable_vector() { +; CHECK-LABEL: @smax_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat(i32 1)) + ret i32 %x +} + define i32 @smax_inc() { ; CHECK-LABEL: @smax_inc( ; CHECK-NEXT: ret i32 8 @@ -468,8 +682,17 @@ define i32 @smax_undef() { ret i32 %x } -define i32 @smax_undef1() { -; CHECK-LABEL: @smax_undef1( +define i32 @smax_undef_scalable_vector() { +; CHECK-LABEL: @smax_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @smax_undef_elt() { +; CHECK-LABEL: @smax_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -485,8 +708,17 @@ define i32 @smax_poison() { ret i32 %x } -define i32 @smax_poison1() { -; CHECK-LABEL: @smax_poison1( +define i32 @smax_poison_scalable_vector() { +; CHECK-LABEL: @smax_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @smax_poison_elt() { +; CHECK-LABEL: @smax_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> <i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1, i32 poison>) @@ -501,6 +733,15 @@ define i32 @umin_0() { ret i32 %x } +define i32 @umin_0_scalable_vector() { +; CHECK-LABEL: @umin_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @umin_1() { ; CHECK-LABEL: @umin_1( ; CHECK-NEXT: ret i32 1 @@ -509,6 +750,15 @@ define i32 @umin_1() { ret i32 %x } +define i32 @umin_1_scalable_vector() { +; CHECK-LABEL: @umin_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1)) + ret i32 %x +} + define i32 @umin_inc() { ; CHECK-LABEL: @umin_inc( ; CHECK-NEXT: ret i32 1 @@ -534,8 +784,17 @@ define i32 @umin_undef() { ret i32 %x } -define i32 @umin_undef1() { -; CHECK-LABEL: @umin_undef1( +define i32 @umin_undef_scalable_vector() { +; CHECK-LABEL: @umin_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @umin_undef_elt() { +; CHECK-LABEL: @umin_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -551,8 +810,17 @@ define i32 @umin_poison() { ret i32 %x } -define i32 @umin_poison1() { -; CHECK-LABEL: @umin_poison1( +define i32 @umin_poison_scalable_vector() { +; CHECK-LABEL: @umin_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @umin_poison_elt() { +; CHECK-LABEL: @umin_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> <i32 1, i32 1, i32 -1, i32 poison, i32 1, i32 1, i32 1, i32 1>) @@ -567,6 +835,15 @@ define i32 @umax_0() { ret i32 %x } +define i32 @umax_0_scalable_vector() { +; CHECK-LABEL: @umax_0_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer) + ret i32 %x +} + define i32 @umax_1() { ; CHECK-LABEL: @umax_1( ; CHECK-NEXT: ret i32 1 @@ -575,6 +852,15 @@ define i32 @umax_1() { ret i32 %x } +define i32 @umax_1_scalable_vector() { +; CHECK-LABEL: @umax_1_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat(i32 1)) + ret i32 %x +} + define i32 @umax_inc() { ; CHECK-LABEL: @umax_inc( ; CHECK-NEXT: ret i32 -3 @@ -600,8 +886,17 @@ define i32 @umax_undef() { ret i32 %x } -define i32 @umax_undef1() { -; CHECK-LABEL: @umax_undef1( +define i32 @umax_undef_scalable_vector() { +; CHECK-LABEL: @umax_undef_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> undef) + ret i32 %x +} + +define i32 @umax_undef_elt() { +; CHECK-LABEL: @umax_undef_elt( ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>) ; CHECK-NEXT: ret i32 [[X]] ; @@ -617,8 +912,17 @@ define i32 @umax_poison() { ret i32 %x } -define i32 @umax_poison1() { -; CHECK-LABEL: @umax_poison1( +define i32 @umax_poison_scalable_vector() { +; CHECK-LABEL: @umax_poison_scalable_vector( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison) + ret i32 %x +} + +define i32 @umax_poison_elt() { +; CHECK-LABEL: @umax_poison_elt( ; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> <i32 1, i32 1, i32 poison, i32 1, i32 1, i32 poison, i32 1, i32 1>) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 964a257..fafa82c 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -2800,6 +2800,88 @@ exit: ret i64 %r.0.lcssa } +define i32 @reduction_expression_ext_mulacc_livein(ptr %a, i16 %c) { +; CHECK-LABEL: define i32 @reduction_expression_ext_mulacc_livein( +; CHECK-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP5]] = add i32 [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[FOR_EXIT:.*]] +; CHECK: [[FOR_EXIT]]: +; CHECK-NEXT: ret i32 [[TMP5]] +; +; CHECK-INTERLEAVED-LABEL: define i32 @reduction_expression_ext_mulacc_livein( +; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) { +; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*:]] +; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-INTERLEAVED: [[VECTOR_PH]]: +; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0 +; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INTERLEAVED: [[VECTOR_BODY]]: +; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16> +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP2]] +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) +; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-INTERLEAVED-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP10]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP8]] +; CHECK-INTERLEAVED-NEXT: br label %[[FOR_EXIT:.*]] +; CHECK-INTERLEAVED: [[FOR_EXIT]]: +; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]] +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %gep.a = getelementptr i8, ptr %a, i64 %iv + %load.a = load i8, ptr %gep.a, align 1 + %ext.a = zext i8 %load.a to i16 + %mul = mul i16 %c, %ext.a + %mul.ext = zext i16 %mul to i32 + %add = add i32 %mul.ext, %accum + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.exit, label %for.body + +for.exit: ; preds = %for.body + ret i32 %add +} + declare float @llvm.fmuladd.f32(float, float, float) !6 = distinct !{!6, !7, !8} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 06b0448..291ada8 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -800,3 +800,545 @@ exit: %r.0.lcssa = phi i64 [ %rdx.next, %loop ] ret i64 %r.0.lcssa } + +define i32 @print_mulacc_extended_const(ptr %start, ptr %end) { +; CHECK-LABEL: 'print_mulacc_extended_const' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF +; CHECK-NEXT: Live-in vp<%1> = VF * UF +; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: vp<%3> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) +; CHECK-NEXT: Successor(s): scalar.ph, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> +; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> +; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> +; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> +; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32)) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 +; CHECK-NEXT: IR %red.next = add i32 %red, %mul +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in ir<%1> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 +; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 +; CHECK-NEXT: IR %0 = add i64 %end1, 1 +; CHECK-NEXT: IR %1 = sub i64 %0, %start2 +; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> +; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> +; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> +; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> +; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32 +; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<63> +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> +; CHECK-NEXT: Successor(s): middle.block, vector.body +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<scalar.ph>: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 +; CHECK-NEXT: IR %red.next = add i32 %red, %mul +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = zext i8 %l to i32 + %mul = mul i32 %l.ext, 63 + %red.next = add i32 %red, %mul + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %red.next +} + +; Constants >= 128 cannot be treated as sign-extended, so the expression shouldn't extend 128 +define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) { +; CHECK-LABEL: 'print_mulacc_not_extended_const' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF +; CHECK-NEXT: Live-in vp<%1> = VF * UF +; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: vp<%3> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) +; CHECK-NEXT: Successor(s): scalar.ph, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> +; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> +; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> +; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> +; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 +; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul ir<%l.ext>, ir<128>) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 +; CHECK-NEXT: IR %red.next = add i32 %red, %mul +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in ir<%1> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 +; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 +; CHECK-NEXT: IR %0 = add i64 %end1, 1 +; CHECK-NEXT: IR %1 = sub i64 %0, %start2 +; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> +; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> +; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> +; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> +; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 +; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> +; CHECK-NEXT: Successor(s): middle.block, vector.body +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<scalar.ph>: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 +; CHECK-NEXT: IR %red.next = add i32 %red, %mul +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = sext i8 %l to i32 + %mul = mul i32 %l.ext, 128 + %red.next = add i32 %red, %mul + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + %red.next.lcssa = phi i32 [ %red.next, %loop ] + ret i32 %red.next.lcssa +} + +define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) { +; CHECK-LABEL: 'print_ext_mulacc_extended_const' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF +; CHECK-NEXT: Live-in vp<%1> = VF * UF +; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: vp<%3> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) +; CHECK-NEXT: Successor(s): scalar.ph, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> +; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> +; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> +; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> +; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64)) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 +; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64 +; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in ir<%1> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 +; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 +; CHECK-NEXT: IR %0 = add i64 %end1, 1 +; CHECK-NEXT: IR %1 = sub i64 %0, %start2 +; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> +; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> +; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> +; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> +; CHECK-NEXT: WIDEN-CAST vp<%4> = zext ir<%l> to i64 +; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%4>, ir<63> +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> +; CHECK-NEXT: Successor(s): middle.block, vector.body +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%6> = compute-reduction-result ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%6> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<scalar.ph>: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%6>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = zext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63 +; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64 +; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = zext i8 %l to i32 + %mul = mul i32 %l.ext, 63 + %mul.ext = zext i32 %mul to i64 + %red.next = add i64 %red, %mul.ext + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + ret i64 %red.next +} + +; Constants >= 128 cannot be treated as sign-extended, so the expression shouldn't extend 128 +define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { +; CHECK-LABEL: 'print_ext_mulacc_not_extended_const' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF +; CHECK-NEXT: Live-in vp<%1> = VF * UF +; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: vp<%3> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) +; CHECK-NEXT: Successor(s): scalar.ph, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1> +; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9> +; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7> +; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%8> +; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 +; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> +; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (ir<%mul> sext to i64) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 +; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64 +; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' { +; CHECK-NEXT: Live-in ir<%1> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64 +; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64 +; CHECK-NEXT: IR %0 = add i64 %end1, 1 +; CHECK-NEXT: IR %1 = sub i64 %0, %start2 +; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4> +; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check> +; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4> +; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf> +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1> +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index> +; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep> +; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 +; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> +; CHECK-NEXT: WIDEN-CAST ir<%mul.ext> = sext ir<%mul> to i64 +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul.ext>) +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> +; CHECK-NEXT: Successor(s): middle.block, vector.body +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<scalar.ph>: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) +; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 +; CHECK-NEXT: IR %l.ext = sext i8 %l to i32 +; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128 +; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64 +; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext +; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 +; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = sext i8 %l to i32 + %mul = mul i32 %l.ext, 128 + %mul.ext = sext i32 %mul to i64 + %red.next = add i64 %red, %mul.ext + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + %red.next.lcssa = phi i64 [ %red.next, %loop ] + ret i64 %red.next.lcssa +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll new file mode 100644 index 0000000..9e96e93 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i) { +; CHECK-LABEL: define i64 @test( +; CHECK-SAME: ptr [[ARG1:%.*]], i64 [[ALLOCA_PROMOTED344:%.*]], i8 [[LOAD_311_I:%.*]], i1 [[LOAD1_I:%.*]]) { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[LOAD_311_I]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> <i8 poison, i8 poison, i8 0, i8 0>, i8 [[LOAD_311_I]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[ALLOCA_PROMOTED344]], i32 0 +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[BB]] ], [ [[TMP28:%.*]], %[[BB12_8_I:.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <8 x i8> [ zeroinitializer, %[[BB]] ], [ [[TMP29:%.*]], %[[BB12_8_I]] ] +; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[SPAM_EXIT:.*]], label %[[BB4_LR_PH_I:.*]] +; CHECK: [[BB4_LR_PH_I]]: +; CHECK-NEXT: br i1 true, label %[[BB3_I_I_PEEL:.*]], label %[[EGGS_EXIT_I_PEEL:.*]] +; CHECK: [[BB3_I_I_PEEL]]: +; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], splat (i64 1) +; CHECK-NEXT: [[LOAD4_I_I_PEEL:%.*]] = load i64, ptr [[ARG1]], align 8 +; CHECK-NEXT: [[SHL_I_I_PEEL:%.*]] = shl i64 [[LOAD4_I_I_PEEL]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 poison, i32 0> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[SHL_I_I_PEEL]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: br label %[[EGGS_EXIT_I_PEEL]] +; CHECK: [[EGGS_EXIT_I_PEEL]]: +; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i64> [ [[TMP10]], %[[BB3_I_I_PEEL]] ], [ zeroinitializer, %[[BB4_LR_PH_I]] ] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0> +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: br label %[[SPAM_EXIT]] +; CHECK: [[SPAM_EXIT]]: +; CHECK-NEXT: [[GETELEMENTPTR_I_I_PROMOTED346:%.*]] = phi i64 [ [[TMP14]], %[[EGGS_EXIT_I_PEEL]] ], [ 0, %[[BB2]] ] +; CHECK-NEXT: [[LOAD_8_I:%.*]] = phi i8 [ 0, %[[EGGS_EXIT_I_PEEL]] ], [ 1, %[[BB2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP13]], %[[EGGS_EXIT_I_PEEL]] ], [ zeroinitializer, %[[BB2]] ] +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[BB12_8_I]], label %[[BB12_1_THREAD_I:.*]] +; CHECK: [[BB12_1_THREAD_I]]: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0 +; CHECK-NEXT: [[ICMP5_3_I:%.*]] = icmp eq i8 [[TMP17]], 0 +; CHECK-NEXT: br i1 [[ICMP5_3_I]], label %[[BB12_3_I:.*]], label %[[BB8_3_I:.*]] +; CHECK: [[BB8_3_I]]: +; CHECK-NEXT: br label %[[BB12_3_I]] +; CHECK: [[BB12_3_I]]: +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i8> [[TMP4]], i32 1 +; CHECK-NEXT: [[ICMP5_4_I:%.*]] = icmp eq i8 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[ICMP5_4_I]], label %[[BB12_4_I:.*]], label %[[BB8_4_I:.*]] +; CHECK: [[BB8_4_I]]: +; CHECK-NEXT: br label %[[BB12_4_I]] +; CHECK: [[BB12_4_I]]: +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP4]], i32 2 +; CHECK-NEXT: [[ICMP5_5_I:%.*]] = icmp eq i8 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[ICMP5_5_I]], label %[[BB12_5_I:.*]], label %[[BB8_5_I:.*]] +; CHECK: [[BB8_5_I]]: +; CHECK-NEXT: br label %[[BB12_5_I]] +; CHECK: [[BB12_5_I]]: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i8> [[TMP4]], i32 3 +; CHECK-NEXT: [[ICMP5_7_I:%.*]] = icmp eq i8 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[ICMP5_7_I]], label %[[BB12_7_I:.*]], label %[[BB8_7_I:.*]] +; CHECK: [[BB8_7_I]]: +; CHECK-NEXT: br label %[[BB12_7_I]] +; CHECK: [[BB12_7_I]]: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP4]], i32 4 +; CHECK-NEXT: [[ICMP5_8_I:%.*]] = icmp eq i8 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[ICMP5_8_I]], label %[[BB12_8_I]], label %[[BB8_8_I:.*]] +; CHECK: [[BB8_8_I]]: +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[LOAD_8_I]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i8> poison, i8 [[LOAD_8_I]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <4 x i32> <i32 poison, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> +; CHECK-NEXT: br label %[[BB12_8_I]] +; CHECK: [[BB12_8_I]]: +; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i8> [ [[TMP0]], %[[BB12_7_I]] ], [ [[TMP22]], %[[BB8_8_I]] ], [ [[TMP15]], %[[SPAM_EXIT]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ zeroinitializer, %[[BB12_7_I]] ], [ [[TMP25]], %[[BB8_8_I]] ], [ [[TMP16]], %[[SPAM_EXIT]] ] +; CHECK-NEXT: [[TMP28]] = insertelement <2 x i64> [[TMP2]], i64 [[GETELEMENTPTR_I_I_PROMOTED346]], i32 1 +; CHECK-NEXT: [[TMP29]] = shufflevector <4 x i8> [[TMP26]], <4 x i8> [[TMP27]], <8 x i32> <i32 2, i32 7, i32 5, i32 0, i32 1, i32 3, i32 4, i32 6> +; CHECK-NEXT: br label %[[BB2]] +; +bb: + br label %bb2 + +bb2: + %getelementptr.i.i.promoted = phi i64 [ 0, %bb ], [ %getelementptr.i.i.promoted346, %bb12.8.i ] + %alloca.promoted = phi i64 [ 0, %bb ], [ %alloca.promoted344, %bb12.8.i ] + %load.8.i231 = phi i8 [ 0, %bb ], [ %load.8.i239, %bb12.8.i ] + %load.7.i217 = phi i8 [ 0, %bb ], [ %load.7.i225, %bb12.8.i ] + %load.626.i200 = phi i8 [ 0, %bb ], [ %load.626.i208, %bb12.8.i ] + %load.6.i183 = phi i8 [ 0, %bb ], [ %load.6.i191, %bb12.8.i ] + %load.5.i167 = phi i8 [ 0, %bb ], [ %load.5.i175, %bb12.8.i ] + %load.418.i148 = phi i8 [ 0, %bb ], [ %load.418.i156, %bb12.8.i ] + %load.4.i129 = phi i8 [ 0, %bb ], [ %load.4.i137, %bb12.8.i ] + %load.3.i111 = phi i8 [ 0, %bb ], [ %load.3.i119, %bb12.8.i ] + br i1 %load1.i, label %spam.exit, label %bb4.lr.ph.i + +bb4.lr.ph.i: + br i1 true, label %bb3.i.i.peel, label %eggs.exit.i.peel + +bb3.i.i.peel: + %and.i.i.peel = and i64 %alloca.promoted, 1 + %load4.i.i.peel = load i64, ptr %arg1, align 8 + %shl.i.i.peel = shl i64 %load4.i.i.peel, 1 + %or.i.i.peel = or i64 %shl.i.i.peel, %and.i.i.peel + %and6.i.i.peel = and i64 %getelementptr.i.i.promoted, 1 + %xor.i.i.peel = xor i64 %and6.i.i.peel, %alloca.promoted + br label %eggs.exit.i.peel + +eggs.exit.i.peel: + %load5.i.i93.peel = phi i64 [ %xor.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ] + %or.i.i91.peel = phi i64 [ %or.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ] + %0 = trunc i64 %or.i.i91.peel to i8 + %1 = trunc nuw i64 %or.i.i91.peel to i8 + %2 = trunc i64 %load5.i.i93.peel to i8 + br label %spam.exit + +spam.exit: + %getelementptr.i.i.promoted346 = phi i64 [ %load5.i.i93.peel, %eggs.exit.i.peel ], [ 0, %bb2 ] + %load.834.i = phi i8 [ %2, %eggs.exit.i.peel ], [ 0, %bb2 ] + %load.7.i25 = phi i8 [ %1, %eggs.exit.i.peel ], [ 0, %bb2 ] + %load.8.i = phi i8 [ 0, %eggs.exit.i.peel ], [ 1, %bb2 ] + %load.6.i18 = phi i8 [ %0, %eggs.exit.i.peel ], [ 0, %bb2 ] + br i1 %load1.i, label %bb12.8.i, label %bb12.1.thread.i + +bb12.1.thread.i: + %icmp5.3.i = icmp eq i8 %load.3.i111, 0 + br i1 %icmp5.3.i, label %bb12.3.i, label %bb8.3.i + +bb8.3.i: + br label %bb12.3.i + +bb12.3.i: + %icmp5.4.i = icmp eq i8 %load.4.i129, 0 + br i1 %icmp5.4.i, label %bb12.4.i, label %bb8.4.i + +bb8.4.i: + br label %bb12.4.i + +bb12.4.i: + %icmp5.5.i = icmp eq i8 %load.5.i167, 0 + br i1 %icmp5.5.i, label %bb12.5.i, label %bb8.5.i + +bb8.5.i: + br label %bb12.5.i + +bb12.5.i: + %icmp5.7.i = icmp eq i8 %load.7.i217, 0 + br i1 %icmp5.7.i, label %bb12.7.i, label %bb8.7.i + +bb8.7.i: + br label %bb12.7.i + +bb12.7.i: + %icmp5.8.i = icmp eq i8 %load.8.i231, 0 + br i1 %icmp5.8.i, label %bb12.8.i, label %bb8.8.i + +bb8.8.i: + br label %bb12.8.i + +bb12.8.i: + %load.8.i239 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.834.i, %spam.exit ] + %load.7.i225 = phi i8 [ 0, %bb12.7.i ], [ %load.311.i, %bb8.8.i ], [ %load.7.i25, %spam.exit ] + %load.626.i208 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.6.i191 = phi i8 [ %load.311.i, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.5.i175 = phi i8 [ 0, %bb12.7.i ], [ %load.6.i183, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.418.i156 = phi i8 [ 0, %bb12.7.i ], [ %load.626.i200, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.4.i137 = phi i8 [ 0, %bb12.7.i ], [ %load.418.i148, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.3.i119 = phi i8 [ 0, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + br label %bb2 +} diff --git a/llvm/test/Transforms/SimplifyCFG/pr165088.ll b/llvm/test/Transforms/SimplifyCFG/pr165088.ll new file mode 100644 index 0000000..4514a19 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/pr165088.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s + +; Avoid getting stuck in the cycle pr165088_cycle_[1-4]. + +define void @pr165088_cycle_1(i8 %x) { +; CHECK-LABEL: define void @pr165088_cycle_1( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[X]], 2 +; CHECK-NEXT: br i1 [[TMP0]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; CHECK: [[BLOCK1:.*]]: +; CHECK-NEXT: [[COND2:%.*]] = icmp ugt i8 [[X]], 1 +; CHECK-NEXT: br i1 [[COND2]], label %[[BLOCK3]], label %[[BLOCK2]] +; CHECK: [[BLOCK2]]: +; CHECK-NEXT: br label %[[BLOCK3]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK1]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %switch = icmp uge i8 %x, 2 + %cond1 = icmp ugt i8 %x, 1 + %or.cond = and i1 %switch, %cond1 + br i1 %or.cond, label %block3, label %block2 + +block1: + %cond2 = icmp ugt i8 %x, 1 + br i1 %cond2, label %block3, label %block2 + +block2: + br label %block3 + +block3: + %cond3 = icmp eq i8 %x, 0 + br i1 %cond3, label %exit, label %block1 + +exit: + ret void +} + +define void @pr165088_cycle_2(i8 %x) { +; CHECK-LABEL: define void @pr165088_cycle_2( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[X]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; CHECK: [[BLOCK1:.*]]: +; CHECK-NEXT: [[COND2:%.*]] = icmp ugt i8 [[X]], 1 +; CHECK-NEXT: br i1 [[COND2]], label %[[BLOCK3]], label %[[BLOCK2]] +; CHECK: [[BLOCK2]]: +; CHECK-NEXT: br label %[[BLOCK3]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK1]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + switch i8 %x, label %block3 [ + i8 1, label %block2 + i8 0, label %block2 + ] + +block1: ; preds = %block3 + %cond2 = icmp ugt i8 %x, 1 + br i1 %cond2, label %block3, label %block2 + +block2: ; preds = %entry, %entry, %block1 + br label %block3 + +block3: ; preds = %entry, %block2, %block1 + %cond3 = icmp eq i8 %x, 0 + br i1 %cond3, label %exit, label %block1 + +exit: ; preds = %block3 + ret void +} + +define void @pr165088_cycle_3(i8 %x) { +; CHECK-LABEL: define void @pr165088_cycle_3( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[BLOCK3:.*]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK3]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + switch i8 %x, label %block1 [ + i8 1, label %block2 + i8 0, label %block2 + ] + +block1: ; preds = %entry, %block3 + %cond2 = icmp ugt i8 %x, 1 + br i1 %cond2, label %block3, label %block2 + +block2: ; preds = %entry, %entry, %block1 + br label %block3 + +block3: ; preds = %block2, %block1 + %cond3 = icmp eq i8 %x, 0 + br i1 %cond3, label %exit, label %block1 + +exit: ; preds = %block3 + ret void +} + +define void @pr165088_cycle_4(i8 %x) { +; CHECK-LABEL: define void @pr165088_cycle_4( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[X]], 2 +; CHECK-NEXT: br i1 [[TMP0]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; CHECK: [[BLOCK1:.*]]: +; CHECK-NEXT: [[COND2_OLD:%.*]] = icmp ugt i8 [[X]], 1 +; CHECK-NEXT: br i1 [[COND2_OLD]], label %[[BLOCK3]], label %[[BLOCK2]] +; CHECK: [[BLOCK2]]: +; CHECK-NEXT: br label %[[BLOCK3]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK1]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %switch = icmp ult i8 %x, 2 + br i1 %switch, label %block2, label %block1 + +block1: ; preds = %entry, %block3 + %cond2 = icmp ugt i8 %x, 1 + br i1 %cond2, label %block3, label %block2 + +block2: ; preds = %entry, %block1 + br label %block3 + +block3: ; preds = %block2, %block1 + %cond3 = icmp eq i8 %x, 0 + br i1 %cond3, label %exit, label %block1 + +exit: ; preds = %block3 + ret void +} + +define void @pr165088_original(i8 %x) { +; CHECK-LABEL: define void @pr165088_original( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[X]], 2 +; CHECK-NEXT: br i1 [[TMP0]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; CHECK: [[BLOCK1:.*]]: +; CHECK-NEXT: [[COND3_OLD_OLD:%.*]] = icmp ugt i8 [[X]], 1 +; CHECK-NEXT: br i1 [[COND3_OLD_OLD]], label %[[BLOCK3]], label %[[BLOCK2]] +; CHECK: [[BLOCK2]]: +; CHECK-NEXT: br label %[[BLOCK3]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: [[COND4:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: br i1 [[COND4]], label %[[EXIT:.*]], label %[[BLOCK1]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp ne i8 %x, 0 + %cond3 = icmp ne i8 %x, 0 + %or.cond = and i1 %cond, %cond3 + br i1 %or.cond, label %block3, label %block2 + +block1: ; preds = %block3 + %cond3.old = icmp ugt i8 %x, 1 + br i1 %cond3.old, label %block3, label %block2 + +block2: ; preds = %block1, %entry + br label %block3 + +block3: ; preds = %block2, %block1, %entry + %cond4 = icmp eq i8 %x, 0 + br i1 %cond4, label %exit, label %block1 + +exit: ; preds = %block3 + ret void +} |
