diff options
Diffstat (limited to 'llvm/test/Transforms')
17 files changed, 1214 insertions, 568 deletions
diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll index 3a306a4..ccef61d 100644 --- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll +++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll @@ -12,7 +12,7 @@ target triple = "sparcv9-unknown-unknown" define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) { ; CHECK-LABEL: @test_cmpxchg_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -45,7 +45,7 @@ define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) { ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[RET:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i8 [[RET]] ; @@ -58,7 +58,7 @@ entry: define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) { ; CHECK-LABEL: @test_cmpxchg_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -91,7 +91,7 @@ define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) { ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i16 [[RET]] ; @@ -104,7 +104,7 @@ entry: define i16 @test_add_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_add_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -130,7 +130,7 @@ define i16 @test_add_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -141,7 +141,7 @@ entry: define i16 @test_xor_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_xor_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -164,7 +164,7 @@ define i16 @test_xor_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -175,7 +175,7 @@ entry: define i16 @test_or_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_or_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -198,7 +198,7 @@ define i16 @test_or_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -209,7 +209,7 @@ entry: define i16 @test_and_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_and_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -233,7 +233,7 @@ define i16 @test_and_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -244,7 +244,7 @@ entry: define i16 @test_min_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_min_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -272,7 +272,7 @@ define i16 @test_min_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; entry: @@ -282,7 +282,7 @@ entry: define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) { ; CHECK-LABEL: @test_atomicrmw_fadd_f16( -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 @@ -312,7 +312,7 @@ define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) { ; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret half [[TMP8]] ; %res = atomicrmw fadd ptr %ptr, half %value seq_cst diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index e7b7dff..4173c32 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -1,11 +1,12 @@ ; REQUIRES: asserts ; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE ; This test checks that the analysis identifies all threadable paths in a ; simple CFG. A threadable path includes a list of basic blocks, the exit ; state, and the block that determines the next state. ; < path of BBs that form a cycle > [ state, determinator ] -define i32 @test1(i32 %num) { +define i32 @test1(i32 %num) !prof !0{ ; CHECK: < case2 for.inc for.body > [ 1, for.inc ] ; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] ; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ] @@ -25,8 +26,11 @@ case1: br label %for.inc case2: + ; PROFILE-LABEL: @test1 + ; PROFILE-LABEL: case2: + ; PROFILE: br i1 %cmp, label %for.inc.jt1, label %sel.si.unfold.false.jt2, !prof !1 ; !1 = !{!"branch_weights", i32 3, i32 5} %cmp = icmp eq i32 %count, 50 - %sel = select i1 %cmp, i32 1, i32 2 + %sel = select i1 %cmp, i32 1, i32 2, !prof !1 br label %for.inc for.inc: @@ -182,7 +186,7 @@ bb66: ; preds = %bb59 } ; Value %init is not predictable but it's okay since it is the value initial to the switch. -define i32 @initial.value.positive1(i32 %init) { +define i32 @initial.value.positive1(i32 %init) !prof !0 { ; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] ; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] ; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] @@ -241,3 +245,6 @@ infloop.i: exit: ret i32 0 } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5} diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll index ad05684..092c854 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s ; These tests check that the DFA jump threading transformation is applied @@ -301,7 +301,7 @@ end: ret void } -define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) { +define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) !prof !0 { ; CHECK-LABEL: @pr106083_invalidBBarg_fold( ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] @@ -310,7 +310,7 @@ define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) { ; CHECK-NEXT: br i1 [[NOT:%.*]], label [[BB7_JT0]], label [[BB2:%.*]] ; CHECK: BB2: ; CHECK-NEXT: store i16 0, ptr [[D:%.*]], align 2 -; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: spec.select.si.unfold.false: ; CHECK-NEXT: br label [[BB9]] ; CHECK: spec.select.si.unfold.false.jt0: @@ -357,7 +357,7 @@ BB1: ; preds = %BB1.backedge, %BB7, BB2: ; preds = %BB1 store i16 0, ptr %d, align 2 - %spec.select = select i1 %cmp2, i32 %sel, i32 0 + %spec.select = select i1 %cmp2, i32 %sel, i32 0, !prof !1 br label %BB7 BB7: ; preds = %BB2, %BB1 @@ -444,3 +444,10 @@ select.unfold: ; preds = %bb1, %.loopexit6 bb2: ; preds = %select.unfold unreachable } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5} +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +;. diff --git a/llvm/test/Transforms/GVN/ptrtoaddr.ll b/llvm/test/Transforms/GVN/ptrtoaddr.ll new file mode 100644 index 0000000..6d02bc6 --- /dev/null +++ b/llvm/test/Transforms/GVN/ptrtoaddr.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=gvn < %s | FileCheck %s + +define i64 @ptrtoaddr_same(ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_same( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: ret i64 0 +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} + +; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ +; in result type for the same input. +define i64 @ptrtoaddr_different(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @ptrtoaddr_different( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll new file mode 100644 index 0000000..2b22965 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=instcombine %s -S | FileCheck %s + +declare void @clobber.i32(i32) + +define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %lower.zext, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.range( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or disjoint i32 %upper.shl, %lower + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.range.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or disjoint i32 %lower, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i32 @selective_shift_16.masked( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %lower.zext, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %sel.masked = and i32 %sel, 65535 + ret i32 %sel.masked +} + +define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i32 @selective_shift_16.masked.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %sel.masked = and i32 %sel, 65535 + ret i32 %sel.masked +} + +define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) { +; CHECK-LABEL: define <2 x i16> @selective_shift.v16( +; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16) +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer +; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]] +; CHECK-NEXT: ret <2 x i16> [[SEL_V]] +; + %upper.zext = zext <2 x i16> %upper to <2 x i32> + %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16) + %lower.zext = zext <2 x i16> %lower to <2 x i32> + %pack = or disjoint <2 x i32> %upper.shl, %lower.zext + %mask.bit = and <2 x i32> %mask, splat(i32 16) + %sel = lshr <2 x i32> %pack, %mask.bit + %trunc = trunc <2 x i32> %sel to <2 x i16> + ret <2 x i16> %trunc +} + +define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.wide( +; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 16 + %lower.zext = zext i16 %lower to i64 + %pack = or disjoint i64 %upper.shl, %lower.zext + %mask.bit = and i64 %mask, 16 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i16 + ret i16 %trunc +} + +; narrow zext type blocks fold +define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.narrow( +; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i24 + %upper.shl = shl i24 %upper.zext, 16 + %lower.zext = zext i16 %lower to i24 + %pack = or disjoint i24 %upper.shl, %lower.zext + %mask.bit = and i24 %mask, 16 + %sel = lshr i24 %pack, %mask.bit + %trunc = trunc i24 %sel to i16 + ret i16 %trunc +} + +; %lower's upper bits block fold +define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16_norange( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16 +; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or i32 %upper.shl, %lower + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.0( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]]) +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]]) +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + call void @clobber.i32(i32 %upper.zext) + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + call void @clobber.i32(i32 %lower.zext) + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; multi-use of %pack blocks fold +define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.1( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]]) +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + call void @clobber.i32(i32 %pack) + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; non-truncated use of %sel blocks fold +define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.2( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]]) +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + call void @clobber.i32(i32 %sel) + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; bitwidth must be a power of 2 to fold +define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) { +; CHECK-LABEL: define i24 @selective_shift_24( +; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24 +; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24 +; CHECK-NEXT: ret i24 [[TRUNC]] +; + %upper.zext = zext i24 %upper to i48 + %upper.shl = shl nuw i48 %upper.zext, 24 + %lower.zext = zext i24 %lower to i48 + %pack = or disjoint i48 %upper.shl, %lower.zext + %mask.bit = and i48 %mask, 24 + %sel = lshr i48 %pack, %mask.bit + %trunc = trunc i48 %sel to i24 + ret i24 %trunc +} + +define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i32 @selective_shift_32( +; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: ret i32 [[SEL_V]] +; + %upper.zext = zext i32 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 32 + %lower.zext = zext i32 %lower to i64 + %pack = or disjoint i64 %upper.shl, %lower.zext + %mask.bit = and i64 %mask, 32 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i32 + ret i32 %trunc +} + +define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i32 @selective_shift_32.commute( +; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: ret i32 [[SEL_V]] +; + %upper.zext = zext i32 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 32 + %lower.zext = zext i32 %lower to i64 + %pack = or disjoint i64 %lower.zext, %upper.shl + %mask.bit = and i64 %mask, 32 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i32 + ret i32 %trunc +} diff --git a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll deleted file mode 100644 index 72bc543..0000000 --- a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll +++ /dev/null @@ -1,164 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -passes='loop(loop-rotate)' -loop-rotate-multi=true | FileCheck %s - -; Test loop rotation with multiple exits, some of them - deoptimizing. -; We should end up with a latch which exit is non-deoptimizing, so we should rotate -; more than once. - -declare i32 @llvm.experimental.deoptimize.i32(...) - -define i32 @test_cond_with_one_deopt_exit(ptr nonnull %a, i64 %x) { -; Rotation done twice. -; Latch should be at the 2nd condition (for.cond2), exiting to %return. -; -; CHECK-LABEL: @test_cond_with_one_deopt_exit( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[VAL_A_IDX3:%.*]] = load i32, ptr %a, align 4 -; CHECK-NEXT: [[ZERO_CHECK4:%.*]] = icmp eq i32 [[VAL_A_IDX3]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK4]], label %deopt.exit, label %for.cond2.lr.ph -; CHECK: for.cond2.lr.ph: -; CHECK-NEXT: [[FOR_CHECK8:%.*]] = icmp ult i64 0, %x -; CHECK-NEXT: br i1 [[FOR_CHECK8]], label %for.body.lr.ph, label %return -; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body -; CHECK: for.cond2: -; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x -; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond2.return_crit_edge -; CHECK: for.body: -; CHECK: br label %for.tail -; CHECK: for.tail: -; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr -; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond1.deopt.exit_crit_edge, label %for.cond2 -; CHECK: for.cond2.return_crit_edge: -; CHECK-NEXT: {{%.*}} = phi i32 -; CHECK-NEXT: br label %return -; CHECK: return: -; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32 -; CHECK-NEXT: ret i32 [[SUM_LCSSA2]] -; CHECK: for.cond1.deopt.exit_crit_edge: -; CHECK-NEXT: {{%.*}} = phi i32 -; CHECK-NEXT: br label %deopt.exit -; CHECK: deopt.exit: -; CHECK: [[DEOPT_VAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 {{%.*}}) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL]] -; -entry: - br label %for.cond1 - -for.cond1: - %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] - %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] - %a.idx = getelementptr inbounds i32, ptr %a, i64 %idx - %val.a.idx = load i32, ptr %a.idx, align 4 - %zero.check = icmp eq i32 %val.a.idx, 0 - br i1 %zero.check, label %deopt.exit, label %for.cond2 - -for.cond2: - %for.check = icmp ult i64 %idx, %x - br i1 %for.check, label %for.body, label %return - -for.body: - br label %for.tail - -for.tail: - %sum.next = add i32 %sum, %val.a.idx - %idx.next = add nuw nsw i64 %idx, 1 - br label %for.cond1 - -return: - ret i32 %sum - -deopt.exit: - %deopt.val = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] - ret i32 %deopt.val -} - -define i32 @test_cond_with_two_deopt_exits(ptr nonnull %a, i64 %x) { -; Rotation done three times. -; Latch should be at the 3rd condition (for.cond3), exiting to %return. -; -; CHECK-LABEL: @test_cond_with_two_deopt_exits( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_IDX_DEREF4:%.*]] = load ptr, ptr %a -; CHECK-NEXT: [[NULL_CHECK5:%.*]] = icmp eq ptr [[A_IDX_DEREF4]], null -; CHECK-NEXT: br i1 [[NULL_CHECK5]], label %deopt.exit1, label %for.cond2.lr.ph -; CHECK: for.cond2.lr.ph: -; CHECK-NEXT: [[VAL_A_IDX9:%.*]] = load i32, ptr [[A_IDX_DEREF4]], align 4 -; CHECK-NEXT: [[ZERO_CHECK10:%.*]] = icmp eq i32 [[VAL_A_IDX9]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK10]], label %deopt.exit2, label %for.cond3.lr.ph -; CHECK: for.cond3.lr.ph: -; CHECK-NEXT: [[FOR_CHECK14:%.*]] = icmp ult i64 0, %x -; CHECK-NEXT: br i1 [[FOR_CHECK14]], label %for.body.lr.ph, label %return -; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body -; CHECK: for.cond2: -; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr -; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond2.deopt.exit2_crit_edge, label %for.cond3 -; CHECK: for.cond3: -; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x -; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond3.return_crit_edge -; CHECK: for.body: -; CHECK: br label %for.tail -; CHECK: for.tail: -; CHECK: [[IDX_NEXT:%.*]] = add nuw nsw i64 {{%.*}}, 1 -; CHECK: [[NULL_CHECK:%.*]] = icmp eq ptr {{%.*}}, null -; CHECK-NEXT: br i1 [[NULL_CHECK]], label %for.cond1.deopt.exit1_crit_edge, label %for.cond2 -; CHECK: for.cond3.return_crit_edge: -; CHECK-NEXT: [[SPLIT18:%.*]] = phi i32 -; CHECK-NEXT: br label %return -; CHECK: return: -; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32 -; CHECK-NEXT: ret i32 [[SUM_LCSSA2]] -; CHECK: for.cond1.deopt.exit1_crit_edge: -; CHECK-NEXT: br label %deopt.exit1 -; CHECK: deopt.exit1: -; CHECK-NEXT: [[DEOPT_VAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL1]] -; CHECK: for.cond2.deopt.exit2_crit_edge: -; CHECK-NEXT: [[SPLIT:%.*]] = phi i32 -; CHECK-NEXT: br label %deopt.exit2 -; CHECK: deopt.exit2: -; CHECK-NEXT: [[VAL_A_IDX_LCSSA:%.*]] = phi i32 -; CHECK-NEXT: [[DEOPT_VAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[VAL_A_IDX_LCSSA]]) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL2]] -; -entry: - br label %for.cond1 - -for.cond1: - %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] - %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] - %a.idx = getelementptr inbounds ptr, ptr %a, i64 %idx - %a.idx.deref = load ptr, ptr %a.idx - %null.check = icmp eq ptr %a.idx.deref, null - br i1 %null.check, label %deopt.exit1, label %for.cond2 - -for.cond2: - %val.a.idx = load i32, ptr %a.idx.deref, align 4 - %zero.check = icmp eq i32 %val.a.idx, 0 - br i1 %zero.check, label %deopt.exit2, label %for.cond3 - -for.cond3: - %for.check = icmp ult i64 %idx, %x - br i1 %for.check, label %for.body, label %return - -for.body: - br label %for.tail - -for.tail: - %sum.next = add i32 %sum, %val.a.idx - %idx.next = add nuw nsw i64 %idx, 1 - br label %for.cond1 - -return: - ret i32 %sum - -deopt.exit1: - %deopt.val1 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ] - ret i32 %deopt.val1 -deopt.exit2: - %deopt.val2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] - ret i32 %deopt.val2 -} diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll deleted file mode 100644 index 748700c..0000000 --- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll +++ /dev/null @@ -1,236 +0,0 @@ -; RUN: opt -S -passes=loop-rotate < %s -verify-loop-info -verify-dom-info -verify-memoryssa | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.8.0" - -; PR7447 -define i32 @test1(ptr nocapture %a) nounwind readonly { -entry: - br label %for.cond - -for.cond: ; preds = %for.cond1, %entry - %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ] - %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ] - br i1 %i.0, label %for.cond1, label %return - -for.cond1: ; preds = %for.cond, %land.rhs - %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ] - %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ] - %cmp2 = icmp ult i32 %i.1, 100 - br i1 %cmp2, label %land.rhs, label %for.cond - -land.rhs: ; preds = %for.cond1 - %conv = zext i32 %i.1 to i64 - %arrayidx = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 %conv - %0 = load i32, ptr %arrayidx, align 4 - %add = add i32 %0, %sum.1 - %cmp4 = icmp ugt i32 %add, 1000 - %inc = add i32 %i.1, 1 - br i1 %cmp4, label %return, label %for.cond1 - -return: ; preds = %for.cond, %land.rhs - %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ] - ret i32 %retval.0 - -; CHECK-LABEL: @test1( -; CHECK: for.cond1.preheader: -; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ] -; CHECK: br label %for.cond1 - -; CHECK: for.cond1: -; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ] -; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ] -; CHECK: %cmp2 = icmp ult i32 %i.1, 100 -; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit -} - -define void @test2(i32 %x) nounwind { -entry: - br label %for.cond - -for.cond: ; preds = %if.end, %entry - %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] - %cmp = icmp eq i32 %i.0, %x - br i1 %cmp, label %return.loopexit, label %for.body - -for.body: ; preds = %for.cond - %call = tail call i32 @foo(i32 %i.0) nounwind - %tobool = icmp eq i32 %call, 0 - br i1 %tobool, label %if.end, label %a - -if.end: ; preds = %for.body - %call1 = tail call i32 @foo(i32 42) nounwind - %inc = add i32 %i.0, 1 - br label %for.cond - -a: ; preds = %for.body - %call2 = tail call i32 @bar(i32 1) nounwind - br label %return - -return.loopexit: ; preds = %for.cond - br label %return - -return: ; preds = %return.loopexit, %a - ret void - -; CHECK-LABEL: @test2( -; CHECK: if.end: -; CHECK: %inc = add i32 %i.02, 1 -; CHECK: %cmp = icmp eq i32 %inc, %x -; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body -} - -declare i32 @foo(i32) - -declare i32 @bar(i32) - -@_ZTIi = external constant ptr - -; Verify dominators. -define void @test3(i32 %x) personality ptr @__gxx_personality_v0 { -entry: - %cmp2 = icmp eq i32 0, %x - br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.lr.ph, %for.inc - %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] - invoke void @_Z3fooi(i32 %i.03) - to label %for.inc unwind label %lpad - -for.inc: ; preds = %for.body - %inc = add i32 %i.03, 1 - %cmp = icmp eq i32 %inc, %x - br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body - -lpad: ; preds = %for.body - %0 = landingpad { ptr, i32 } - catch ptr @_ZTIi - %1 = extractvalue { ptr, i32 } %0, 0 - %2 = extractvalue { ptr, i32 } %0, 1 - %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind - %matches = icmp eq i32 %2, %3 - br i1 %matches, label %catch, label %eh.resume - -catch: ; preds = %lpad - %4 = tail call ptr @__cxa_begin_catch(ptr %1) nounwind - br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph - -for.body.i.lr.ph: ; preds = %catch - br label %for.body.i - -for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i - %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ] - invoke void @_Z3fooi(i32 %i.0.i1) - to label %for.inc.i unwind label %lpad.i - -for.inc.i: ; preds = %for.body.i - %inc.i = add i32 %i.0.i1, 1 - %cmp.i = icmp eq i32 %inc.i, 0 - br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i - -lpad.i: ; preds = %for.body.i - %5 = landingpad { ptr, i32 } - catch ptr @_ZTIi - %6 = extractvalue { ptr, i32 } %5, 0 - %7 = extractvalue { ptr, i32 } %5, 1 - %matches.i = icmp eq i32 %7, %3 - br i1 %matches.i, label %catch.i, label %lpad1.body - -catch.i: ; preds = %lpad.i - %8 = tail call ptr @__cxa_begin_catch(ptr %6) nounwind - invoke void @test3(i32 0) - to label %invoke.cont2.i unwind label %lpad1.i - -invoke.cont2.i: ; preds = %catch.i - tail call void @__cxa_end_catch() nounwind - br label %invoke.cont2 - -lpad1.i: ; preds = %catch.i - %9 = landingpad { ptr, i32 } - cleanup - %10 = extractvalue { ptr, i32 } %9, 0 - %11 = extractvalue { ptr, i32 } %9, 1 - tail call void @__cxa_end_catch() nounwind - br label %lpad1.body - -for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i - br label %invoke.cont2.loopexit - -invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch - br label %invoke.cont2 - -invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i - tail call void @__cxa_end_catch() nounwind - br label %try.cont - -for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc - br label %try.cont.loopexit - -try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry - br label %try.cont - -try.cont: ; preds = %try.cont.loopexit, %invoke.cont2 - ret void - -lpad1.body: ; preds = %lpad1.i, %lpad.i - %exn.slot.0.i = phi ptr [ %10, %lpad1.i ], [ %6, %lpad.i ] - %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ] - tail call void @__cxa_end_catch() nounwind - br label %eh.resume - -eh.resume: ; preds = %lpad1.body, %lpad - %exn.slot.0 = phi ptr [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ] - %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ] - %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn.slot.0, 0 - %lpad.val5 = insertvalue { ptr, i32 } %lpad.val, i32 %ehselector.slot.0, 1 - resume { ptr, i32 } %lpad.val5 -} - -declare void @_Z3fooi(i32) - -declare i32 @__gxx_personality_v0(...) - -declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone - -declare ptr @__cxa_begin_catch(ptr) - -declare void @__cxa_end_catch() - -define void @test4(i1 %arg) nounwind uwtable { -entry: - br label %"7" - -"3": ; preds = %"7" - br i1 %arg, label %"31", label %"4" - -"4": ; preds = %"3" - %. = select i1 undef, float 0x3F50624DE0000000, float undef - %0 = add i32 %1, 1 - br label %"7" - -"7": ; preds = %"4", %entry - %1 = phi i32 [ %0, %"4" ], [ 0, %entry ] - %2 = icmp slt i32 %1, 100 - br i1 %2, label %"3", label %"8" - -"8": ; preds = %"7" - br i1 %arg, label %"9", label %"31" - -"9": ; preds = %"8" - br label %"33" - -"27": ; preds = %"31" - unreachable - -"31": ; preds = %"8", %"3" - br i1 %arg, label %"27", label %"32" - -"32": ; preds = %"31" - br label %"33" - -"33": ; preds = %"32", %"9" - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index 649e34e..7548bf6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -45,9 +45,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 8) ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) ; CHECK-UF4-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 4, i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY2:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 8, i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 12, i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY1:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ] @@ -67,17 +64,11 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK5]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP19]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-UF4-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; CHECK-UF4-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; CHECK-UF4-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP12]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 12) ; CHECK-UF4-NEXT: [[TMP11]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 8) ; CHECK-UF4-NEXT: [[TMP10]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) ; CHECK-UF4-NEXT: [[TMP9]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT7:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP13]], i64 [[TMP6]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT8:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP14]], i64 [[TMP6]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT9:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP15]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-UF4-NEXT: [[TMP20:%.*]] = xor i1 [[TMP21]], true ; CHECK-UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index 5ee4e9e..75acbea9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -46,23 +46,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-UF4-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] -; CHECK-UF4-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] -; CHECK-UF4-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 0, i64 [[N]]) ; CHECK-UF4-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 48) ; CHECK-UF4-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 32) ; CHECK-UF4-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 16) ; CHECK-UF4-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -103,23 +91,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] -; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4 -; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] -; CHECK-UF4-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5 -; CHECK-UF4-NEXT: [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]] -; CHECK-UF4-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48 -; CHECK-UF4-NEXT: [[TMP54:%.*]] = add i64 [[INDEX]], [[TMP53]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP58]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 48) ; CHECK-UF4-NEXT: [[TMP57]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 32) ; CHECK-UF4-NEXT: [[TMP56]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 16) ; CHECK-UF4-NEXT: [[TMP55]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP48]], i64 [[TMP9]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP51]], i64 [[TMP9]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement <vscale x 16 x i1> [[TMP55]], i32 0 ; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true ; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -191,23 +167,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]] ; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]] ; CHECK-UF4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0 -; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] -; CHECK-UF4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] -; CHECK-UF4-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-UF4-NEXT: [[TMP14:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 6) ; CHECK-UF4-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) ; CHECK-UF4-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 2) ; CHECK-UF4-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -248,23 +212,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] -; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1 -; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] -; CHECK-UF4-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2 -; CHECK-UF4-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] -; CHECK-UF4-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6 -; CHECK-UF4-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP48]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP53]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 6) ; CHECK-UF4-NEXT: [[TMP52]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) ; CHECK-UF4-NEXT: [[TMP51]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 2) ; CHECK-UF4-NEXT: [[TMP50]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP43]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP46]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement <vscale x 2 x i1> [[TMP50]], i32 0 ; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true ; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index f5329cf..c225ede5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -580,6 +580,201 @@ exit: ret double %accum } +define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %src, ptr noalias %src.2, ptr noalias %dst) #0 { +; I64-LABEL: define void @loaded_address_used_by_load_through_blend( +; I64-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; I64-NEXT: [[ENTRY:.*]]: +; I64-NEXT: br label %[[LOOP_HEADER:.*]] +; I64: [[LOOP_HEADER]]: +; I64-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; I64-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_LATCH]] ] +; I64-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1 +; I64-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]] +; I64-NEXT: [[L_SRC:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; I64-NEXT: [[C:%.*]] = fcmp oeq float [[L_SRC]], 0.000000e+00 +; I64-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; I64: [[THEN]]: +; I64-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV_1]], [[START]] +; I64-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[IV_MUL]] +; I64-NEXT: br label %[[LOOP_LATCH]] +; I64: [[LOOP_LATCH]]: +; I64-NEXT: [[MERGE_GEP:%.*]] = phi ptr [ [[GEP_SRC_2]], %[[THEN]] ], [ [[SRC_2]], %[[LOOP_HEADER]] ] +; I64-NEXT: [[L_2:%.*]] = load float, ptr [[MERGE_GEP]], align 4 +; I64-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; I64-NEXT: store float [[L_2]], ptr [[GEP_DST]], align 4 +; I64-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; I64-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 +; I64-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 100 +; I64-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]] +; I64: [[EXIT]]: +; I64-NEXT: ret void +; +; I32-LABEL: define void @loaded_address_used_by_load_through_blend( +; I32-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; I32-NEXT: [[ENTRY:.*:]] +; I32-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1 +; I32-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 100) +; I32-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]] +; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8 +; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; I32: [[VECTOR_PH]]: +; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8 +; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]] +; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0 +; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0 +; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; I32-NEXT: br label %[[VECTOR_BODY:.*]] +; I32: [[VECTOR_BODY]]: +; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 +; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 +; I32-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 4 +; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5 +; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6 +; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7 +; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1 +; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1 +; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1 +; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1 +; I32-NEXT: [[TMP15:%.*]] = add i64 [[TMP7]], 1 +; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1 +; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1 +; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1 +; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0 +; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1 +; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2 +; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3 +; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4 +; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5 +; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6 +; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7 +; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]] +; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]] +; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] +; I32-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]] +; I32-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]] +; I32-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP16]] +; I32-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP17]] +; I32-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] +; I32-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP27]], align 4 +; I32-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4 +; I32-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4 +; I32-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4 +; I32-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4 +; I32-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4 +; I32-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4 +; I32-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4 +; I32-NEXT: [[TMP43:%.*]] = insertelement <8 x float> poison, float [[TMP35]], i32 0 +; I32-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP36]], i32 1 +; I32-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP37]], i32 2 +; I32-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP38]], i32 3 +; I32-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP39]], i32 4 +; I32-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP40]], i32 5 +; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6 +; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7 +; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer +; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]] +; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0 +; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]] +; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1 +; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]] +; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2 +; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]] +; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3 +; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]] +; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4 +; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]] +; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5 +; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]] +; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6 +; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]] +; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7 +; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]] +; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0 +; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1 +; I32-NEXT: [[TMP71:%.*]] = insertelement <8 x ptr> [[TMP70]], ptr [[TMP58]], i32 2 +; I32-NEXT: [[TMP72:%.*]] = insertelement <8 x ptr> [[TMP71]], ptr [[TMP60]], i32 3 +; I32-NEXT: [[TMP73:%.*]] = insertelement <8 x ptr> [[TMP72]], ptr [[TMP62]], i32 4 +; I32-NEXT: [[TMP74:%.*]] = insertelement <8 x ptr> [[TMP73]], ptr [[TMP64]], i32 5 +; I32-NEXT: [[TMP75:%.*]] = insertelement <8 x ptr> [[TMP74]], ptr [[TMP66]], i32 6 +; I32-NEXT: [[TMP76:%.*]] = insertelement <8 x ptr> [[TMP75]], ptr [[TMP68]], i32 7 +; I32-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP51]], <8 x ptr> [[TMP76]], <8 x ptr> [[BROADCAST_SPLAT2]] +; I32-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 0 +; I32-NEXT: [[TMP78:%.*]] = load float, ptr [[TMP77]], align 4 +; I32-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 1 +; I32-NEXT: [[TMP80:%.*]] = load float, ptr [[TMP79]], align 4 +; I32-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 2 +; I32-NEXT: [[TMP82:%.*]] = load float, ptr [[TMP81]], align 4 +; I32-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 3 +; I32-NEXT: [[TMP84:%.*]] = load float, ptr [[TMP83]], align 4 +; I32-NEXT: [[TMP85:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 4 +; I32-NEXT: [[TMP86:%.*]] = load float, ptr [[TMP85]], align 4 +; I32-NEXT: [[TMP87:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 5 +; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4 +; I32-NEXT: [[TMP89:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 6 +; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4 +; I32-NEXT: [[TMP91:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 7 +; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4 +; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] +; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] +; I32-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; I32-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] +; I32-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]] +; I32-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] +; I32-NEXT: store float [[TMP78]], ptr [[TMP93]], align 4 +; I32-NEXT: store float [[TMP80]], ptr [[TMP94]], align 4 +; I32-NEXT: store float [[TMP82]], ptr [[TMP95]], align 4 +; I32-NEXT: store float [[TMP84]], ptr [[TMP96]], align 4 +; I32-NEXT: store float [[TMP86]], ptr [[TMP97]], align 4 +; I32-NEXT: store float [[TMP88]], ptr [[TMP98]], align 4 +; I32-NEXT: store float [[TMP90]], ptr [[TMP99]], align 4 +; I32-NEXT: store float [[TMP92]], ptr [[TMP100]], align 4 +; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; I32-NEXT: [[TMP101:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; I32-NEXT: br i1 [[TMP101]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; I32: [[MIDDLE_BLOCK]]: +; I32-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; I32-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; I32: [[SCALAR_PH]]: +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.2 = phi i64 [ %start, %entry ], [ %iv.2.next, %loop.latch ] + %iv.1 = add i64 %iv, 1 + %gep.src = getelementptr i8, ptr %src, i64 %iv.1 + %l.src = load float, ptr %gep.src, align 4 + %c = fcmp oeq float %l.src, 0.000000e+00 + br i1 %c, label %then, label %loop.latch + +then: + %iv.mul = mul i64 %iv.1, %start + %gep.src.2 = getelementptr i8, ptr %src.2, i64 %iv.mul + br label %loop.latch + +loop.latch: + %merge.gep = phi ptr [ %gep.src.2, %then ], [ %src.2, %loop.header ] + %l.2 = load float, ptr %merge.gep, align 4 + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store float %l.2, ptr %gep.dst, align 4 + %iv.next = add i64 %iv, 1 + %iv.2.next = add i64 %iv.2, -1 + %ec = icmp sgt i64 %iv.2, 100 + br i1 %ec, label %loop.header, label %exit + +exit: + ret void +} + +attributes #0 = { "target-cpu"="znver3" } attributes #0 = { "target-cpu"="znver2" } !0 = distinct !{!0, !1} diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll new file mode 100644 index 0000000..b020e59 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S %s | FileCheck %s + +@end = external global [128 x i8] + +; Test case for https://github.com/llvm/llvm-project/issues/162128. +define void @test_epilogue_step_scev_expansion(ptr %dst) { +; CHECK-LABEL: define void @test_epilogue_step_scev_expansion( +; CHECK-SAME: ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), 4 +; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_MOD_VF1]] +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX3]] +; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT4]], [[N_VEC2]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC2]] +; CHECK-NEXT: br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i8 0, ptr [[GEP_DST]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], sub (i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @end, i64 1) to i64)) +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store i8 0, ptr %gep.dst, align 1 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, sub (i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @end, i64 1) to i64)) + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll index af57967..b63ab8f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll @@ -22,7 +22,6 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 256) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/NewGVN/ptrtoaddr.ll b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll new file mode 100644 index 0000000..e51b42a --- /dev/null +++ b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=newgvn < %s | FileCheck %s + +define i64 @ptrtoaddr_same(ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_same( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} + +; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ +; in result type for the same input. +define i64 @ptrtoaddr_different(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @ptrtoaddr_different( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll new file mode 100644 index 0000000..7c9888f --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll @@ -0,0 +1,448 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -O3 < %s | FileCheck %s + +target datalayout = "n64:32:16:8" + +define i8 @test(i8 %x) { +; CHECK-LABEL: define range(i8 0, 53) i8 @test( +; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[X_:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 52) +; CHECK-NEXT: ret i8 [[X_]] +; +start: + %_0 = alloca [1 x i8], align 1 + %0 = icmp eq i8 %x, 0 + br i1 %0, label %bb1, label %bb2 + +bb1: ; preds = %start + store i8 0, ptr %_0, align 1 + br label %bb105 + +bb2: ; preds = %start + %1 = icmp eq i8 %x, 1 + br i1 %1, label %bb3, label %bb4 + +bb105: ; preds = %bb104, %bb103, %bb101, %bb99, %bb97, %bb95, %bb93, %bb91, %bb89, %bb87, %bb85, %bb83, %bb81, %bb79, %bb77, %bb75, %bb73, %bb71, %bb69, %bb67, %bb65, %bb63, %bb61, %bb59, %bb57, %bb55, %bb53, %bb51, %bb49, %bb47, %bb45, %bb43, %bb41, %bb39, %bb37, %bb35, %bb33, %bb31, %bb29, %bb27, %bb25, %bb23, %bb21, %bb19, %bb17, %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1 + %2 = load i8, ptr %_0, align 1 + ret i8 %2 + +bb3: ; preds = %bb2 + store i8 1, ptr %_0, align 1 + br label %bb105 + +bb4: ; preds = %bb2 + %3 = icmp eq i8 %x, 2 + br i1 %3, label %bb5, label %bb6 + +bb5: ; preds = %bb4 + store i8 2, ptr %_0, align 1 + br label %bb105 + +bb6: ; preds = %bb4 + %4 = icmp eq i8 %x, 3 + br i1 %4, label %bb7, label %bb8 + +bb7: ; preds = %bb6 + store i8 3, ptr %_0, align 1 + br label %bb105 + +bb8: ; preds = %bb6 + %5 = icmp eq i8 %x, 4 + br i1 %5, label %bb9, label %bb10 + +bb9: ; preds = %bb8 + store i8 4, ptr %_0, align 1 + br label %bb105 + +bb10: ; preds = %bb8 + %6 = icmp eq i8 %x, 5 + br i1 %6, label %bb11, label %bb12 + +bb11: ; preds = %bb10 + store i8 5, ptr %_0, align 1 + br label %bb105 + +bb12: ; preds = %bb10 + %7 = icmp eq i8 %x, 6 + br i1 %7, label %bb13, label %bb14 + +bb13: ; preds = %bb12 + store i8 6, ptr %_0, align 1 + br label %bb105 + +bb14: ; preds = %bb12 + %8 = icmp eq i8 %x, 7 + br i1 %8, label %bb15, label %bb16 + +bb15: ; preds = %bb14 + store i8 7, ptr %_0, align 1 + br label %bb105 + +bb16: ; preds = %bb14 + %9 = icmp eq i8 %x, 8 + br i1 %9, label %bb17, label %bb18 + +bb17: ; preds = %bb16 + store i8 8, ptr %_0, align 1 + br label %bb105 + +bb18: ; preds = %bb16 + %10 = icmp eq i8 %x, 9 + br i1 %10, label %bb19, label %bb20 + +bb19: ; preds = %bb18 + store i8 9, ptr %_0, align 1 + br label %bb105 + +bb20: ; preds = %bb18 + %11 = icmp eq i8 %x, 10 + br i1 %11, label %bb21, label %bb22 + +bb21: ; preds = %bb20 + store i8 10, ptr %_0, align 1 + br label %bb105 + +bb22: ; preds = %bb20 + %12 = icmp eq i8 %x, 11 + br i1 %12, label %bb23, label %bb24 + +bb23: ; preds = %bb22 + store i8 11, ptr %_0, align 1 + br label %bb105 + +bb24: ; preds = %bb22 + %13 = icmp eq i8 %x, 12 + br i1 %13, label %bb25, label %bb26 + +bb25: ; preds = %bb24 + store i8 12, ptr %_0, align 1 + br label %bb105 + +bb26: ; preds = %bb24 + %14 = icmp eq i8 %x, 13 + br i1 %14, label %bb27, label %bb28 + +bb27: ; preds = %bb26 + store i8 13, ptr %_0, align 1 + br label %bb105 + +bb28: ; preds = %bb26 + %15 = icmp eq i8 %x, 14 + br i1 %15, label %bb29, label %bb30 + +bb29: ; preds = %bb28 + store i8 14, ptr %_0, align 1 + br label %bb105 + +bb30: ; preds = %bb28 + %16 = icmp eq i8 %x, 15 + br i1 %16, label %bb31, label %bb32 + +bb31: ; preds = %bb30 + store i8 15, ptr %_0, align 1 + br label %bb105 + +bb32: ; preds = %bb30 + %17 = icmp eq i8 %x, 16 + br i1 %17, label %bb33, label %bb34 + +bb33: ; preds = %bb32 + store i8 16, ptr %_0, align 1 + br label %bb105 + +bb34: ; preds = %bb32 + %18 = icmp eq i8 %x, 17 + br i1 %18, label %bb35, label %bb36 + +bb35: ; preds = %bb34 + store i8 17, ptr %_0, align 1 + br label %bb105 + +bb36: ; preds = %bb34 + %19 = icmp eq i8 %x, 18 + br i1 %19, label %bb37, label %bb38 + +bb37: ; preds = %bb36 + store i8 18, ptr %_0, align 1 + br label %bb105 + +bb38: ; preds = %bb36 + %20 = icmp eq i8 %x, 19 + br i1 %20, label %bb39, label %bb40 + +bb39: ; preds = %bb38 + store i8 19, ptr %_0, align 1 + br label %bb105 + +bb40: ; preds = %bb38 + %21 = icmp eq i8 %x, 20 + br i1 %21, label %bb41, label %bb42 + +bb41: ; preds = %bb40 + store i8 20, ptr %_0, align 1 + br label %bb105 + +bb42: ; preds = %bb40 + %22 = icmp eq i8 %x, 21 + br i1 %22, label %bb43, label %bb44 + +bb43: ; preds = %bb42 + store i8 21, ptr %_0, align 1 + br label %bb105 + +bb44: ; preds = %bb42 + %23 = icmp eq i8 %x, 22 + br i1 %23, label %bb45, label %bb46 + +bb45: ; preds = %bb44 + store i8 22, ptr %_0, align 1 + br label %bb105 + +bb46: ; preds = %bb44 + %24 = icmp eq i8 %x, 23 + br i1 %24, label %bb47, label %bb48 + +bb47: ; preds = %bb46 + store i8 23, ptr %_0, align 1 + br label %bb105 + +bb48: ; preds = %bb46 + %25 = icmp eq i8 %x, 24 + br i1 %25, label %bb49, label %bb50 + +bb49: ; preds = %bb48 + store i8 24, ptr %_0, align 1 + br label %bb105 + +bb50: ; preds = %bb48 + %26 = icmp eq i8 %x, 25 + br i1 %26, label %bb51, label %bb52 + +bb51: ; preds = %bb50 + store i8 25, ptr %_0, align 1 + br label %bb105 + +bb52: ; preds = %bb50 + %27 = icmp eq i8 %x, 26 + br i1 %27, label %bb53, label %bb54 + +bb53: ; preds = %bb52 + store i8 26, ptr %_0, align 1 + br label %bb105 + +bb54: ; preds = %bb52 + %28 = icmp eq i8 %x, 27 + br i1 %28, label %bb55, label %bb56 + +bb55: ; preds = %bb54 + store i8 27, ptr %_0, align 1 + br label %bb105 + +bb56: ; preds = %bb54 + %29 = icmp eq i8 %x, 28 + br i1 %29, label %bb57, label %bb58 + +bb57: ; preds = %bb56 + store i8 28, ptr %_0, align 1 + br label %bb105 + +bb58: ; preds = %bb56 + %30 = icmp eq i8 %x, 29 + br i1 %30, label %bb59, label %bb60 + +bb59: ; preds = %bb58 + store i8 29, ptr %_0, align 1 + br label %bb105 + +bb60: ; preds = %bb58 + %31 = icmp eq i8 %x, 30 + br i1 %31, label %bb61, label %bb62 + +bb61: ; preds = %bb60 + store i8 30, ptr %_0, align 1 + br label %bb105 + +bb62: ; preds = %bb60 + %32 = icmp eq i8 %x, 31 + br i1 %32, label %bb63, label %bb64 + +bb63: ; preds = %bb62 + store i8 31, ptr %_0, align 1 + br label %bb105 + +bb64: ; preds = %bb62 + %33 = icmp eq i8 %x, 32 + br i1 %33, label %bb65, label %bb66 + +bb65: ; preds = %bb64 + store i8 32, ptr %_0, align 1 + br label %bb105 + +bb66: ; preds = %bb64 + %34 = icmp eq i8 %x, 33 + br i1 %34, label %bb67, label %bb68 + +bb67: ; preds = %bb66 + store i8 33, ptr %_0, align 1 + br label %bb105 + +bb68: ; preds = %bb66 + %35 = icmp eq i8 %x, 34 + br i1 %35, label %bb69, label %bb70 + +bb69: ; preds = %bb68 + store i8 34, ptr %_0, align 1 + br label %bb105 + +bb70: ; preds = %bb68 + %36 = icmp eq i8 %x, 35 + br i1 %36, label %bb71, label %bb72 + +bb71: ; preds = %bb70 + store i8 35, ptr %_0, align 1 + br label %bb105 + +bb72: ; preds = %bb70 + %37 = icmp eq i8 %x, 36 + br i1 %37, label %bb73, label %bb74 + +bb73: ; preds = %bb72 + store i8 36, ptr %_0, align 1 + br label %bb105 + +bb74: ; preds = %bb72 + %38 = icmp eq i8 %x, 37 + br i1 %38, label %bb75, label %bb76 + +bb75: ; preds = %bb74 + store i8 37, ptr %_0, align 1 + br label %bb105 + +bb76: ; preds = %bb74 + %39 = icmp eq i8 %x, 38 + br i1 %39, label %bb77, label %bb78 + +bb77: ; preds = %bb76 + store i8 38, ptr %_0, align 1 + br label %bb105 + +bb78: ; preds = %bb76 + %40 = icmp eq i8 %x, 39 + br i1 %40, label %bb79, label %bb80 + +bb79: ; preds = %bb78 + store i8 39, ptr %_0, align 1 + br label %bb105 + +bb80: ; preds = %bb78 + %41 = icmp eq i8 %x, 40 + br i1 %41, label %bb81, label %bb82 + +bb81: ; preds = %bb80 + store i8 40, ptr %_0, align 1 + br label %bb105 + +bb82: ; preds = %bb80 + %42 = icmp eq i8 %x, 41 + br i1 %42, label %bb83, label %bb84 + +bb83: ; preds = %bb82 + store i8 41, ptr %_0, align 1 + br label %bb105 + +bb84: ; preds = %bb82 + %43 = icmp eq i8 %x, 42 + br i1 %43, label %bb85, label %bb86 + +bb85: ; preds = %bb84 + store i8 42, ptr %_0, align 1 + br label %bb105 + +bb86: ; preds = %bb84 + %44 = icmp eq i8 %x, 43 + br i1 %44, label %bb87, label %bb88 + +bb87: ; preds = %bb86 + store i8 43, ptr %_0, align 1 + br label %bb105 + +bb88: ; preds = %bb86 + %45 = icmp eq i8 %x, 44 + br i1 %45, label %bb89, label %bb90 + +bb89: ; preds = %bb88 + store i8 44, ptr %_0, align 1 + br label %bb105 + +bb90: ; preds = %bb88 + %46 = icmp eq i8 %x, 45 + br i1 %46, label %bb91, label %bb92 + +bb91: ; preds = %bb90 + store i8 45, ptr %_0, align 1 + br label %bb105 + +bb92: ; preds = %bb90 + %47 = icmp eq i8 %x, 46 + br i1 %47, label %bb93, label %bb94 + +bb93: ; preds = %bb92 + store i8 46, ptr %_0, align 1 + br label %bb105 + +bb94: ; preds = %bb92 + %48 = icmp eq i8 %x, 47 + br i1 %48, label %bb95, label %bb96 + +bb95: ; preds = %bb94 + store i8 47, ptr %_0, align 1 + br label %bb105 + +bb96: ; preds = %bb94 + %49 = icmp eq i8 %x, 48 + br i1 %49, label %bb97, label %bb98 + +bb97: ; preds = %bb96 + store i8 48, ptr %_0, align 1 + br label %bb105 + +bb98: ; preds = %bb96 + %50 = icmp eq i8 %x, 49 + br i1 %50, label %bb99, label %bb100 + +bb99: ; preds = %bb98 + store i8 49, ptr %_0, align 1 + br label %bb105 + +bb100: ; preds = %bb98 + %51 = icmp eq i8 %x, 50 + br i1 %51, label %bb101, label %bb102 + +bb101: ; preds = %bb100 + store i8 50, ptr %_0, align 1 + br label %bb105 + +bb102: ; preds = %bb100 + %52 = icmp eq i8 %x, 51 + br i1 %52, label %bb103, label %bb104 + +bb103: ; preds = %bb102 + store i8 51, ptr %_0, align 1 + br label %bb105 + +bb104: ; preds = %bb102 + store i8 52, ptr %_0, align 1 + br label %bb105 +} + +; Make sure the call is inlined. +define i8 @test2(i8 %x) { +; CHECK-LABEL: define range(i8 0, 53) i8 @test2( +; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call range(i8 0, 53) i8 @llvm.umin.i8(i8 [[X]], i8 52) +; CHECK-NEXT: ret i8 [[CALL]] +; + %call = call i8 @test(i8 %x) + ret i8 %call +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index cf62fd5..a8880274 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -4,21 +4,14 @@ define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_sdiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2 -; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4 ; CHECK-NEXT: ret void @@ -58,21 +51,14 @@ entry: define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_udiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2 -; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll index 9bbe3eb..42d3dcc 100644 --- a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll @@ -97,8 +97,8 @@ if.end: ret ptr %x.0 } -!0 = !{!"int"} -!1 = !{!"char[4]"} +!0 = !{!"int", i1 0} +!1 = !{!"char[4]", i1 0} ;. -; CHECK: [[META0]] = !{!"int"} +; CHECK: [[META0]] = !{!"int", i1 false} ;. diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll index c9063d3..25267dc 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT +; RUN: opt -S -passes='simplifycfg<switch-to-arithmetic>' < %s | FileCheck %s --check-prefix=OPTNOLUT ; RUN: %if amdgpu-registered-target %{ opt -mtriple=amdgcn--amdpal -S -passes='simplifycfg<switch-to-lookup>' < %s | FileCheck %s --check-prefix=TTINOLUT %} ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,23 +7,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @linear_transform_with_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_with_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[END:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[CASE0:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE0]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 1, %[[CASE0]] ], [ 4, %[[CASE1]] ], [ 7, %[[CASE2]] ], [ 10, %[[CASE3]] ], [ 13, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 4 +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 +; OPTNOLUT-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 13 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_with_default( @@ -138,26 +126,8 @@ end: define i32 @linear_transform_no_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_no_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: i32 4, label %[[CASE4:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE4]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: unreachable -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = phi i32 [ 3, %[[CASE1]] ], [ 6, %[[CASE2]] ], [ 9, %[[CASE3]] ], [ 12, %[[CASE4]] ], [ 0, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 ; OPTNOLUT-NEXT: ret i32 [[SWITCH_IDX_MULT]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_no_default( @@ -350,18 +320,9 @@ end: define i32 @single_value_withdefault(i32 %x) { ; OPTNOLUT-LABEL: define i32 @single_value_withdefault( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[DOT:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[DOT:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[DOT]] ; ; TTINOLUT-LABEL: define i32 @single_value_withdefault( @@ -401,18 +362,9 @@ end: define i32 @single_value_no_jump_tables(i32 %x) "no-jump-tables"="true" { ; OPTNOLUT-LABEL: define i32 @single_value_no_jump_tables( ; OPTNOLUT-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @single_value_no_jump_tables( @@ -449,6 +401,60 @@ end: ret i32 %idx } +define i1 @single_value_with_mask(i32 %x) { +; OPTNOLUT-LABEL: define i1 @single_value_with_mask( +; OPTNOLUT-SAME: i32 [[X:%.*]]) { +; OPTNOLUT-NEXT: [[ENTRY:.*]]: +; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ +; OPTNOLUT-NEXT: i32 18, label %[[END:.*]] +; OPTNOLUT-NEXT: i32 21, label %[[END]] +; OPTNOLUT-NEXT: i32 48, label %[[END]] +; OPTNOLUT-NEXT: i32 16, label %[[END]] +; OPTNOLUT-NEXT: ] +; OPTNOLUT: [[DEFAULT]]: +; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; OPTNOLUT-NEXT: br label %[[END]] +; OPTNOLUT: [[END]]: +; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ] +; OPTNOLUT-NEXT: ret i1 [[RES]] +; +; TTINOLUT-LABEL: define i1 @single_value_with_mask( +; TTINOLUT-SAME: i32 [[X:%.*]]) { +; TTINOLUT-NEXT: [[ENTRY:.*]]: +; TTINOLUT-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X]], 16 +; TTINOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 33 +; TTINOLUT-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i32 [[SWITCH_TABLEIDX]] to i64 +; TTINOLUT-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i64 4294967333, [[SWITCH_MASKINDEX]] +; TTINOLUT-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i64 [[SWITCH_SHIFTED]] to i1 +; TTINOLUT-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false +; TTINOLUT-NEXT: br i1 [[OR_COND]], label %[[END:.*]], label %[[DEFAULT:.*]] +; TTINOLUT: [[DEFAULT]]: +; TTINOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; TTINOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; TTINOLUT-NEXT: br label %[[END]] +; TTINOLUT: [[END]]: +; TTINOLUT-NEXT: [[RES:%.*]] = phi i1 [ [[SEL]], %[[DEFAULT]] ], [ false, %[[ENTRY]] ] +; TTINOLUT-NEXT: ret i1 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] + +default: + %cmp = icmp eq i32 %x, 80 + %sel = select i1 %cmp, i1 false, i1 true + br label %end + +end: + %res = phi i1 [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ %sel, %default ] + ret i1 %res +} + define i32 @lookup_table(i32 %x) { ; OPTNOLUT-LABEL: define i32 @lookup_table( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { |