diff options
Diffstat (limited to 'llvm/test/Transforms')
5 files changed, 455 insertions, 80 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index 649e34e..7548bf6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -45,9 +45,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 8) ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) ; CHECK-UF4-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 4, i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY2:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 8, i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 12, i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY1:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ] @@ -67,17 +64,11 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK5]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP19]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-UF4-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; CHECK-UF4-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; CHECK-UF4-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP12]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 12) ; CHECK-UF4-NEXT: [[TMP11]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 8) ; CHECK-UF4-NEXT: [[TMP10]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) ; CHECK-UF4-NEXT: [[TMP9]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT7:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP13]], i64 [[TMP6]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT8:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP14]], i64 [[TMP6]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT9:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP15]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-UF4-NEXT: [[TMP20:%.*]] = xor i1 [[TMP21]], true ; CHECK-UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index 5ee4e9e..75acbea9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -46,23 +46,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-UF4-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] -; CHECK-UF4-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] -; CHECK-UF4-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 0, i64 [[N]]) ; CHECK-UF4-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 48) ; CHECK-UF4-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 32) ; CHECK-UF4-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 16) ; CHECK-UF4-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -103,23 +91,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] -; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4 -; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] -; CHECK-UF4-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5 -; CHECK-UF4-NEXT: [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]] -; CHECK-UF4-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48 -; CHECK-UF4-NEXT: [[TMP54:%.*]] = add i64 [[INDEX]], [[TMP53]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP58]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 48) ; CHECK-UF4-NEXT: [[TMP57]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 32) ; CHECK-UF4-NEXT: [[TMP56]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 16) ; CHECK-UF4-NEXT: [[TMP55]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP48]], i64 [[TMP9]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP51]], i64 [[TMP9]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement <vscale x 16 x i1> [[TMP55]], i32 0 ; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true ; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -191,23 +167,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]] ; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]] ; CHECK-UF4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0 -; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] -; CHECK-UF4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] -; CHECK-UF4-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-UF4-NEXT: [[TMP14:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 6) ; CHECK-UF4-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) ; CHECK-UF4-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 2) ; CHECK-UF4-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -248,23 +212,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] -; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1 -; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] -; CHECK-UF4-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2 -; CHECK-UF4-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] -; CHECK-UF4-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6 -; CHECK-UF4-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP48]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP53]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 6) ; CHECK-UF4-NEXT: [[TMP52]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) ; CHECK-UF4-NEXT: [[TMP51]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 2) ; CHECK-UF4-NEXT: [[TMP50]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP43]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP46]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement <vscale x 2 x i1> [[TMP50]], i32 0 ; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true ; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll index af57967..b63ab8f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll @@ -22,7 +22,6 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 256) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll new file mode 100644 index 0000000..caf7a80 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll @@ -0,0 +1,447 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -O3 < %s | FileCheck %s + +target datalayout = "n64:32:16:8" + +define i8 @test(i8 %x) { +; CHECK-LABEL: define range(i8 0, 53) i8 @test( +; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[X_:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 52) +; CHECK-NEXT: ret i8 [[X_]] +; +start: + %_0 = alloca [1 x i8], align 1 + %0 = icmp eq i8 %x, 0 + br i1 %0, label %bb1, label %bb2 + +bb1: ; preds = %start + store i8 0, ptr %_0, align 1 + br label %bb105 + +bb2: ; preds = %start + %1 = icmp eq i8 %x, 1 + br i1 %1, label %bb3, label %bb4 + +bb105: ; preds = %bb104, %bb103, %bb101, %bb99, %bb97, %bb95, %bb93, %bb91, %bb89, %bb87, %bb85, %bb83, %bb81, %bb79, %bb77, %bb75, %bb73, %bb71, %bb69, %bb67, %bb65, %bb63, %bb61, %bb59, %bb57, %bb55, %bb53, %bb51, %bb49, %bb47, %bb45, %bb43, %bb41, %bb39, %bb37, %bb35, %bb33, %bb31, %bb29, %bb27, %bb25, %bb23, %bb21, %bb19, %bb17, %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1 + %2 = load i8, ptr %_0, align 1 + ret i8 %2 + +bb3: ; preds = %bb2 + store i8 1, ptr %_0, align 1 + br label %bb105 + +bb4: ; preds = %bb2 + %3 = icmp eq i8 %x, 2 + br i1 %3, label %bb5, label %bb6 + +bb5: ; preds = %bb4 + store i8 2, ptr %_0, align 1 + br label %bb105 + +bb6: ; preds = %bb4 + %4 = icmp eq i8 %x, 3 + br i1 %4, label %bb7, label %bb8 + +bb7: ; preds = %bb6 + store i8 3, ptr %_0, align 1 + br label %bb105 + +bb8: ; preds = %bb6 + %5 = icmp eq i8 %x, 4 + br i1 %5, label %bb9, label %bb10 + +bb9: ; preds = %bb8 + store i8 4, ptr %_0, align 1 + br label %bb105 + +bb10: ; preds = %bb8 + %6 = icmp eq i8 %x, 5 + br i1 %6, label %bb11, label %bb12 + +bb11: ; preds = %bb10 + store i8 5, ptr %_0, align 1 + br label %bb105 + +bb12: ; preds = %bb10 + %7 = icmp eq i8 %x, 6 + br i1 %7, label %bb13, label %bb14 + +bb13: ; preds = %bb12 + store i8 6, ptr %_0, align 1 + br label %bb105 + +bb14: ; preds = %bb12 + %8 = icmp eq i8 %x, 7 + br i1 %8, label %bb15, label %bb16 + +bb15: ; preds = %bb14 + store i8 7, ptr %_0, align 1 + br label %bb105 + +bb16: ; preds = %bb14 + %9 = icmp eq i8 %x, 8 + br i1 %9, label %bb17, label %bb18 + +bb17: ; preds = %bb16 + store i8 8, ptr %_0, align 1 + br label %bb105 + +bb18: ; preds = %bb16 + %10 = icmp eq i8 %x, 9 + br i1 %10, label %bb19, label %bb20 + +bb19: ; preds = %bb18 + store i8 9, ptr %_0, align 1 + br label %bb105 + +bb20: ; preds = %bb18 + %11 = icmp eq i8 %x, 10 + br i1 %11, label %bb21, label %bb22 + +bb21: ; preds = %bb20 + store i8 10, ptr %_0, align 1 + br label %bb105 + +bb22: ; preds = %bb20 + %12 = icmp eq i8 %x, 11 + br i1 %12, label %bb23, label %bb24 + +bb23: ; preds = %bb22 + store i8 11, ptr %_0, align 1 + br label %bb105 + +bb24: ; preds = %bb22 + %13 = icmp eq i8 %x, 12 + br i1 %13, label %bb25, label %bb26 + +bb25: ; preds = %bb24 + store i8 12, ptr %_0, align 1 + br label %bb105 + +bb26: ; preds = %bb24 + %14 = icmp eq i8 %x, 13 + br i1 %14, label %bb27, label %bb28 + +bb27: ; preds = %bb26 + store i8 13, ptr %_0, align 1 + br label %bb105 + +bb28: ; preds = %bb26 + %15 = icmp eq i8 %x, 14 + br i1 %15, label %bb29, label %bb30 + +bb29: ; preds = %bb28 + store i8 14, ptr %_0, align 1 + br label %bb105 + +bb30: ; preds = %bb28 + %16 = icmp eq i8 %x, 15 + br i1 %16, label %bb31, label %bb32 + +bb31: ; preds = %bb30 + store i8 15, ptr %_0, align 1 + br label %bb105 + +bb32: ; preds = %bb30 + %17 = icmp eq i8 %x, 16 + br i1 %17, label %bb33, label %bb34 + +bb33: ; preds = %bb32 + store i8 16, ptr %_0, align 1 + br label %bb105 + +bb34: ; preds = %bb32 + %18 = icmp eq i8 %x, 17 + br i1 %18, label %bb35, label %bb36 + +bb35: ; preds = %bb34 + store i8 17, ptr %_0, align 1 + br label %bb105 + +bb36: ; preds = %bb34 + %19 = icmp eq i8 %x, 18 + br i1 %19, label %bb37, label %bb38 + +bb37: ; preds = %bb36 + store i8 18, ptr %_0, align 1 + br label %bb105 + +bb38: ; preds = %bb36 + %20 = icmp eq i8 %x, 19 + br i1 %20, label %bb39, label %bb40 + +bb39: ; preds = %bb38 + store i8 19, ptr %_0, align 1 + br label %bb105 + +bb40: ; preds = %bb38 + %21 = icmp eq i8 %x, 20 + br i1 %21, label %bb41, label %bb42 + +bb41: ; preds = %bb40 + store i8 20, ptr %_0, align 1 + br label %bb105 + +bb42: ; preds = %bb40 + %22 = icmp eq i8 %x, 21 + br i1 %22, label %bb43, label %bb44 + +bb43: ; preds = %bb42 + store i8 21, ptr %_0, align 1 + br label %bb105 + +bb44: ; preds = %bb42 + %23 = icmp eq i8 %x, 22 + br i1 %23, label %bb45, label %bb46 + +bb45: ; preds = %bb44 + store i8 22, ptr %_0, align 1 + br label %bb105 + +bb46: ; preds = %bb44 + %24 = icmp eq i8 %x, 23 + br i1 %24, label %bb47, label %bb48 + +bb47: ; preds = %bb46 + store i8 23, ptr %_0, align 1 + br label %bb105 + +bb48: ; preds = %bb46 + %25 = icmp eq i8 %x, 24 + br i1 %25, label %bb49, label %bb50 + +bb49: ; preds = %bb48 + store i8 24, ptr %_0, align 1 + br label %bb105 + +bb50: ; preds = %bb48 + %26 = icmp eq i8 %x, 25 + br i1 %26, label %bb51, label %bb52 + +bb51: ; preds = %bb50 + store i8 25, ptr %_0, align 1 + br label %bb105 + +bb52: ; preds = %bb50 + %27 = icmp eq i8 %x, 26 + br i1 %27, label %bb53, label %bb54 + +bb53: ; preds = %bb52 + store i8 26, ptr %_0, align 1 + br label %bb105 + +bb54: ; preds = %bb52 + %28 = icmp eq i8 %x, 27 + br i1 %28, label %bb55, label %bb56 + +bb55: ; preds = %bb54 + store i8 27, ptr %_0, align 1 + br label %bb105 + +bb56: ; preds = %bb54 + %29 = icmp eq i8 %x, 28 + br i1 %29, label %bb57, label %bb58 + +bb57: ; preds = %bb56 + store i8 28, ptr %_0, align 1 + br label %bb105 + +bb58: ; preds = %bb56 + %30 = icmp eq i8 %x, 29 + br i1 %30, label %bb59, label %bb60 + +bb59: ; preds = %bb58 + store i8 29, ptr %_0, align 1 + br label %bb105 + +bb60: ; preds = %bb58 + %31 = icmp eq i8 %x, 30 + br i1 %31, label %bb61, label %bb62 + +bb61: ; preds = %bb60 + store i8 30, ptr %_0, align 1 + br label %bb105 + +bb62: ; preds = %bb60 + %32 = icmp eq i8 %x, 31 + br i1 %32, label %bb63, label %bb64 + +bb63: ; preds = %bb62 + store i8 31, ptr %_0, align 1 + br label %bb105 + +bb64: ; preds = %bb62 + %33 = icmp eq i8 %x, 32 + br i1 %33, label %bb65, label %bb66 + +bb65: ; preds = %bb64 + store i8 32, ptr %_0, align 1 + br label %bb105 + +bb66: ; preds = %bb64 + %34 = icmp eq i8 %x, 33 + br i1 %34, label %bb67, label %bb68 + +bb67: ; preds = %bb66 + store i8 33, ptr %_0, align 1 + br label %bb105 + +bb68: ; preds = %bb66 + %35 = icmp eq i8 %x, 34 + br i1 %35, label %bb69, label %bb70 + +bb69: ; preds = %bb68 + store i8 34, ptr %_0, align 1 + br label %bb105 + +bb70: ; preds = %bb68 + %36 = icmp eq i8 %x, 35 + br i1 %36, label %bb71, label %bb72 + +bb71: ; preds = %bb70 + store i8 35, ptr %_0, align 1 + br label %bb105 + +bb72: ; preds = %bb70 + %37 = icmp eq i8 %x, 36 + br i1 %37, label %bb73, label %bb74 + +bb73: ; preds = %bb72 + store i8 36, ptr %_0, align 1 + br label %bb105 + +bb74: ; preds = %bb72 + %38 = icmp eq i8 %x, 37 + br i1 %38, label %bb75, label %bb76 + +bb75: ; preds = %bb74 + store i8 37, ptr %_0, align 1 + br label %bb105 + +bb76: ; preds = %bb74 + %39 = icmp eq i8 %x, 38 + br i1 %39, label %bb77, label %bb78 + +bb77: ; preds = %bb76 + store i8 38, ptr %_0, align 1 + br label %bb105 + +bb78: ; preds = %bb76 + %40 = icmp eq i8 %x, 39 + br i1 %40, label %bb79, label %bb80 + +bb79: ; preds = %bb78 + store i8 39, ptr %_0, align 1 + br label %bb105 + +bb80: ; preds = %bb78 + %41 = icmp eq i8 %x, 40 + br i1 %41, label %bb81, label %bb82 + +bb81: ; preds = %bb80 + store i8 40, ptr %_0, align 1 + br label %bb105 + +bb82: ; preds = %bb80 + %42 = icmp eq i8 %x, 41 + br i1 %42, label %bb83, label %bb84 + +bb83: ; preds = %bb82 + store i8 41, ptr %_0, align 1 + br label %bb105 + +bb84: ; preds = %bb82 + %43 = icmp eq i8 %x, 42 + br i1 %43, label %bb85, label %bb86 + +bb85: ; preds = %bb84 + store i8 42, ptr %_0, align 1 + br label %bb105 + +bb86: ; preds = %bb84 + %44 = icmp eq i8 %x, 43 + br i1 %44, label %bb87, label %bb88 + +bb87: ; preds = %bb86 + store i8 43, ptr %_0, align 1 + br label %bb105 + +bb88: ; preds = %bb86 + %45 = icmp eq i8 %x, 44 + br i1 %45, label %bb89, label %bb90 + +bb89: ; preds = %bb88 + store i8 44, ptr %_0, align 1 + br label %bb105 + +bb90: ; preds = %bb88 + %46 = icmp eq i8 %x, 45 + br i1 %46, label %bb91, label %bb92 + +bb91: ; preds = %bb90 + store i8 45, ptr %_0, align 1 + br label %bb105 + +bb92: ; preds = %bb90 + %47 = icmp eq i8 %x, 46 + br i1 %47, label %bb93, label %bb94 + +bb93: ; preds = %bb92 + store i8 46, ptr %_0, align 1 + br label %bb105 + +bb94: ; preds = %bb92 + %48 = icmp eq i8 %x, 47 + br i1 %48, label %bb95, label %bb96 + +bb95: ; preds = %bb94 + store i8 47, ptr %_0, align 1 + br label %bb105 + +bb96: ; preds = %bb94 + %49 = icmp eq i8 %x, 48 + br i1 %49, label %bb97, label %bb98 + +bb97: ; preds = %bb96 + store i8 48, ptr %_0, align 1 + br label %bb105 + +bb98: ; preds = %bb96 + %50 = icmp eq i8 %x, 49 + br i1 %50, label %bb99, label %bb100 + +bb99: ; preds = %bb98 + store i8 49, ptr %_0, align 1 + br label %bb105 + +bb100: ; preds = %bb98 + %51 = icmp eq i8 %x, 50 + br i1 %51, label %bb101, label %bb102 + +bb101: ; preds = %bb100 + store i8 50, ptr %_0, align 1 + br label %bb105 + +bb102: ; preds = %bb100 + %52 = icmp eq i8 %x, 51 + br i1 %52, label %bb103, label %bb104 + +bb103: ; preds = %bb102 + store i8 51, ptr %_0, align 1 + br label %bb105 + +bb104: ; preds = %bb102 + store i8 52, ptr %_0, align 1 + br label %bb105 +} + +define i8 @test2(i8 %x) { +; CHECK-LABEL: define range(i8 0, 53) i8 @test2( +; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i8 @test(i8 [[X]]) +; CHECK-NEXT: ret i8 [[CALL]] +; + %call = call i8 @test(i8 %x) + ret i8 %call +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index cf62fd5..a8880274 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -4,21 +4,14 @@ define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_sdiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2 -; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4 ; CHECK-NEXT: ret void @@ -58,21 +51,14 @@ entry: define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_udiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2 -; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 ; CHECK-NEXT: ret void |