diff options
Diffstat (limited to 'llvm/test/Transforms')
9 files changed, 650 insertions, 466 deletions
diff --git a/llvm/test/Transforms/GVN/ptrtoaddr.ll b/llvm/test/Transforms/GVN/ptrtoaddr.ll new file mode 100644 index 0000000..6d02bc6 --- /dev/null +++ b/llvm/test/Transforms/GVN/ptrtoaddr.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=gvn < %s | FileCheck %s + +define i64 @ptrtoaddr_same(ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_same( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: ret i64 0 +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} + +; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ +; in result type for the same input. +define i64 @ptrtoaddr_different(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @ptrtoaddr_different( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll new file mode 100644 index 0000000..2b22965 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=instcombine %s -S | FileCheck %s + +declare void @clobber.i32(i32) + +define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %lower.zext, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.range( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or disjoint i32 %upper.shl, %lower + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.range.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or disjoint i32 %lower, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i32 @selective_shift_16.masked( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %lower.zext, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %sel.masked = and i32 %sel, 65535 + ret i32 %sel.masked +} + +define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i32 @selective_shift_16.masked.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %sel.masked = and i32 %sel, 65535 + ret i32 %sel.masked +} + +define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) { +; CHECK-LABEL: define <2 x i16> @selective_shift.v16( +; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16) +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer +; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]] +; CHECK-NEXT: ret <2 x i16> [[SEL_V]] +; + %upper.zext = zext <2 x i16> %upper to <2 x i32> + %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16) + %lower.zext = zext <2 x i16> %lower to <2 x i32> + %pack = or disjoint <2 x i32> %upper.shl, %lower.zext + %mask.bit = and <2 x i32> %mask, splat(i32 16) + %sel = lshr <2 x i32> %pack, %mask.bit + %trunc = trunc <2 x i32> %sel to <2 x i16> + ret <2 x i16> %trunc +} + +define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.wide( +; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 16 + %lower.zext = zext i16 %lower to i64 + %pack = or disjoint i64 %upper.shl, %lower.zext + %mask.bit = and i64 %mask, 16 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i16 + ret i16 %trunc +} + +; narrow zext type blocks fold +define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.narrow( +; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i24 + %upper.shl = shl i24 %upper.zext, 16 + %lower.zext = zext i16 %lower to i24 + %pack = or disjoint i24 %upper.shl, %lower.zext + %mask.bit = and i24 %mask, 16 + %sel = lshr i24 %pack, %mask.bit + %trunc = trunc i24 %sel to i16 + ret i16 %trunc +} + +; %lower's upper bits block fold +define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16_norange( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16 +; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or i32 %upper.shl, %lower + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.0( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]]) +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]]) +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + call void @clobber.i32(i32 %upper.zext) + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + call void @clobber.i32(i32 %lower.zext) + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; multi-use of %pack blocks fold +define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.1( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]]) +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + call void @clobber.i32(i32 %pack) + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; non-truncated use of %sel blocks fold +define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.2( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]]) +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + call void @clobber.i32(i32 %sel) + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; bitwidth must be a power of 2 to fold +define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) { +; CHECK-LABEL: define i24 @selective_shift_24( +; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24 +; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24 +; CHECK-NEXT: ret i24 [[TRUNC]] +; + %upper.zext = zext i24 %upper to i48 + %upper.shl = shl nuw i48 %upper.zext, 24 + %lower.zext = zext i24 %lower to i48 + %pack = or disjoint i48 %upper.shl, %lower.zext + %mask.bit = and i48 %mask, 24 + %sel = lshr i48 %pack, %mask.bit + %trunc = trunc i48 %sel to i24 + ret i24 %trunc +} + +define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i32 @selective_shift_32( +; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: ret i32 [[SEL_V]] +; + %upper.zext = zext i32 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 32 + %lower.zext = zext i32 %lower to i64 + %pack = or disjoint i64 %upper.shl, %lower.zext + %mask.bit = and i64 %mask, 32 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i32 + ret i32 %trunc +} + +define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i32 @selective_shift_32.commute( +; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: ret i32 [[SEL_V]] +; + %upper.zext = zext i32 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 32 + %lower.zext = zext i32 %lower to i64 + %pack = or disjoint i64 %lower.zext, %upper.shl + %mask.bit = and i64 %mask, 32 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i32 + ret i32 %trunc +} diff --git a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll deleted file mode 100644 index 72bc543..0000000 --- a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll +++ /dev/null @@ -1,164 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -passes='loop(loop-rotate)' -loop-rotate-multi=true | FileCheck %s - -; Test loop rotation with multiple exits, some of them - deoptimizing. -; We should end up with a latch which exit is non-deoptimizing, so we should rotate -; more than once. - -declare i32 @llvm.experimental.deoptimize.i32(...) - -define i32 @test_cond_with_one_deopt_exit(ptr nonnull %a, i64 %x) { -; Rotation done twice. -; Latch should be at the 2nd condition (for.cond2), exiting to %return. -; -; CHECK-LABEL: @test_cond_with_one_deopt_exit( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[VAL_A_IDX3:%.*]] = load i32, ptr %a, align 4 -; CHECK-NEXT: [[ZERO_CHECK4:%.*]] = icmp eq i32 [[VAL_A_IDX3]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK4]], label %deopt.exit, label %for.cond2.lr.ph -; CHECK: for.cond2.lr.ph: -; CHECK-NEXT: [[FOR_CHECK8:%.*]] = icmp ult i64 0, %x -; CHECK-NEXT: br i1 [[FOR_CHECK8]], label %for.body.lr.ph, label %return -; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body -; CHECK: for.cond2: -; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x -; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond2.return_crit_edge -; CHECK: for.body: -; CHECK: br label %for.tail -; CHECK: for.tail: -; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr -; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond1.deopt.exit_crit_edge, label %for.cond2 -; CHECK: for.cond2.return_crit_edge: -; CHECK-NEXT: {{%.*}} = phi i32 -; CHECK-NEXT: br label %return -; CHECK: return: -; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32 -; CHECK-NEXT: ret i32 [[SUM_LCSSA2]] -; CHECK: for.cond1.deopt.exit_crit_edge: -; CHECK-NEXT: {{%.*}} = phi i32 -; CHECK-NEXT: br label %deopt.exit -; CHECK: deopt.exit: -; CHECK: [[DEOPT_VAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 {{%.*}}) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL]] -; -entry: - br label %for.cond1 - -for.cond1: - %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] - %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] - %a.idx = getelementptr inbounds i32, ptr %a, i64 %idx - %val.a.idx = load i32, ptr %a.idx, align 4 - %zero.check = icmp eq i32 %val.a.idx, 0 - br i1 %zero.check, label %deopt.exit, label %for.cond2 - -for.cond2: - %for.check = icmp ult i64 %idx, %x - br i1 %for.check, label %for.body, label %return - -for.body: - br label %for.tail - -for.tail: - %sum.next = add i32 %sum, %val.a.idx - %idx.next = add nuw nsw i64 %idx, 1 - br label %for.cond1 - -return: - ret i32 %sum - -deopt.exit: - %deopt.val = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] - ret i32 %deopt.val -} - -define i32 @test_cond_with_two_deopt_exits(ptr nonnull %a, i64 %x) { -; Rotation done three times. -; Latch should be at the 3rd condition (for.cond3), exiting to %return. -; -; CHECK-LABEL: @test_cond_with_two_deopt_exits( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_IDX_DEREF4:%.*]] = load ptr, ptr %a -; CHECK-NEXT: [[NULL_CHECK5:%.*]] = icmp eq ptr [[A_IDX_DEREF4]], null -; CHECK-NEXT: br i1 [[NULL_CHECK5]], label %deopt.exit1, label %for.cond2.lr.ph -; CHECK: for.cond2.lr.ph: -; CHECK-NEXT: [[VAL_A_IDX9:%.*]] = load i32, ptr [[A_IDX_DEREF4]], align 4 -; CHECK-NEXT: [[ZERO_CHECK10:%.*]] = icmp eq i32 [[VAL_A_IDX9]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK10]], label %deopt.exit2, label %for.cond3.lr.ph -; CHECK: for.cond3.lr.ph: -; CHECK-NEXT: [[FOR_CHECK14:%.*]] = icmp ult i64 0, %x -; CHECK-NEXT: br i1 [[FOR_CHECK14]], label %for.body.lr.ph, label %return -; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body -; CHECK: for.cond2: -; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr -; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond2.deopt.exit2_crit_edge, label %for.cond3 -; CHECK: for.cond3: -; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x -; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond3.return_crit_edge -; CHECK: for.body: -; CHECK: br label %for.tail -; CHECK: for.tail: -; CHECK: [[IDX_NEXT:%.*]] = add nuw nsw i64 {{%.*}}, 1 -; CHECK: [[NULL_CHECK:%.*]] = icmp eq ptr {{%.*}}, null -; CHECK-NEXT: br i1 [[NULL_CHECK]], label %for.cond1.deopt.exit1_crit_edge, label %for.cond2 -; CHECK: for.cond3.return_crit_edge: -; CHECK-NEXT: [[SPLIT18:%.*]] = phi i32 -; CHECK-NEXT: br label %return -; CHECK: return: -; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32 -; CHECK-NEXT: ret i32 [[SUM_LCSSA2]] -; CHECK: for.cond1.deopt.exit1_crit_edge: -; CHECK-NEXT: br label %deopt.exit1 -; CHECK: deopt.exit1: -; CHECK-NEXT: [[DEOPT_VAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL1]] -; CHECK: for.cond2.deopt.exit2_crit_edge: -; CHECK-NEXT: [[SPLIT:%.*]] = phi i32 -; CHECK-NEXT: br label %deopt.exit2 -; CHECK: deopt.exit2: -; CHECK-NEXT: [[VAL_A_IDX_LCSSA:%.*]] = phi i32 -; CHECK-NEXT: [[DEOPT_VAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[VAL_A_IDX_LCSSA]]) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL2]] -; -entry: - br label %for.cond1 - -for.cond1: - %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] - %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] - %a.idx = getelementptr inbounds ptr, ptr %a, i64 %idx - %a.idx.deref = load ptr, ptr %a.idx - %null.check = icmp eq ptr %a.idx.deref, null - br i1 %null.check, label %deopt.exit1, label %for.cond2 - -for.cond2: - %val.a.idx = load i32, ptr %a.idx.deref, align 4 - %zero.check = icmp eq i32 %val.a.idx, 0 - br i1 %zero.check, label %deopt.exit2, label %for.cond3 - -for.cond3: - %for.check = icmp ult i64 %idx, %x - br i1 %for.check, label %for.body, label %return - -for.body: - br label %for.tail - -for.tail: - %sum.next = add i32 %sum, %val.a.idx - %idx.next = add nuw nsw i64 %idx, 1 - br label %for.cond1 - -return: - ret i32 %sum - -deopt.exit1: - %deopt.val1 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ] - ret i32 %deopt.val1 -deopt.exit2: - %deopt.val2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] - ret i32 %deopt.val2 -} diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll deleted file mode 100644 index 748700c..0000000 --- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll +++ /dev/null @@ -1,236 +0,0 @@ -; RUN: opt -S -passes=loop-rotate < %s -verify-loop-info -verify-dom-info -verify-memoryssa | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.8.0" - -; PR7447 -define i32 @test1(ptr nocapture %a) nounwind readonly { -entry: - br label %for.cond - -for.cond: ; preds = %for.cond1, %entry - %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ] - %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ] - br i1 %i.0, label %for.cond1, label %return - -for.cond1: ; preds = %for.cond, %land.rhs - %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ] - %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ] - %cmp2 = icmp ult i32 %i.1, 100 - br i1 %cmp2, label %land.rhs, label %for.cond - -land.rhs: ; preds = %for.cond1 - %conv = zext i32 %i.1 to i64 - %arrayidx = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 %conv - %0 = load i32, ptr %arrayidx, align 4 - %add = add i32 %0, %sum.1 - %cmp4 = icmp ugt i32 %add, 1000 - %inc = add i32 %i.1, 1 - br i1 %cmp4, label %return, label %for.cond1 - -return: ; preds = %for.cond, %land.rhs - %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ] - ret i32 %retval.0 - -; CHECK-LABEL: @test1( -; CHECK: for.cond1.preheader: -; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ] -; CHECK: br label %for.cond1 - -; CHECK: for.cond1: -; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ] -; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ] -; CHECK: %cmp2 = icmp ult i32 %i.1, 100 -; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit -} - -define void @test2(i32 %x) nounwind { -entry: - br label %for.cond - -for.cond: ; preds = %if.end, %entry - %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] - %cmp = icmp eq i32 %i.0, %x - br i1 %cmp, label %return.loopexit, label %for.body - -for.body: ; preds = %for.cond - %call = tail call i32 @foo(i32 %i.0) nounwind - %tobool = icmp eq i32 %call, 0 - br i1 %tobool, label %if.end, label %a - -if.end: ; preds = %for.body - %call1 = tail call i32 @foo(i32 42) nounwind - %inc = add i32 %i.0, 1 - br label %for.cond - -a: ; preds = %for.body - %call2 = tail call i32 @bar(i32 1) nounwind - br label %return - -return.loopexit: ; preds = %for.cond - br label %return - -return: ; preds = %return.loopexit, %a - ret void - -; CHECK-LABEL: @test2( -; CHECK: if.end: -; CHECK: %inc = add i32 %i.02, 1 -; CHECK: %cmp = icmp eq i32 %inc, %x -; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body -} - -declare i32 @foo(i32) - -declare i32 @bar(i32) - -@_ZTIi = external constant ptr - -; Verify dominators. -define void @test3(i32 %x) personality ptr @__gxx_personality_v0 { -entry: - %cmp2 = icmp eq i32 0, %x - br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.lr.ph, %for.inc - %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] - invoke void @_Z3fooi(i32 %i.03) - to label %for.inc unwind label %lpad - -for.inc: ; preds = %for.body - %inc = add i32 %i.03, 1 - %cmp = icmp eq i32 %inc, %x - br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body - -lpad: ; preds = %for.body - %0 = landingpad { ptr, i32 } - catch ptr @_ZTIi - %1 = extractvalue { ptr, i32 } %0, 0 - %2 = extractvalue { ptr, i32 } %0, 1 - %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind - %matches = icmp eq i32 %2, %3 - br i1 %matches, label %catch, label %eh.resume - -catch: ; preds = %lpad - %4 = tail call ptr @__cxa_begin_catch(ptr %1) nounwind - br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph - -for.body.i.lr.ph: ; preds = %catch - br label %for.body.i - -for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i - %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ] - invoke void @_Z3fooi(i32 %i.0.i1) - to label %for.inc.i unwind label %lpad.i - -for.inc.i: ; preds = %for.body.i - %inc.i = add i32 %i.0.i1, 1 - %cmp.i = icmp eq i32 %inc.i, 0 - br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i - -lpad.i: ; preds = %for.body.i - %5 = landingpad { ptr, i32 } - catch ptr @_ZTIi - %6 = extractvalue { ptr, i32 } %5, 0 - %7 = extractvalue { ptr, i32 } %5, 1 - %matches.i = icmp eq i32 %7, %3 - br i1 %matches.i, label %catch.i, label %lpad1.body - -catch.i: ; preds = %lpad.i - %8 = tail call ptr @__cxa_begin_catch(ptr %6) nounwind - invoke void @test3(i32 0) - to label %invoke.cont2.i unwind label %lpad1.i - -invoke.cont2.i: ; preds = %catch.i - tail call void @__cxa_end_catch() nounwind - br label %invoke.cont2 - -lpad1.i: ; preds = %catch.i - %9 = landingpad { ptr, i32 } - cleanup - %10 = extractvalue { ptr, i32 } %9, 0 - %11 = extractvalue { ptr, i32 } %9, 1 - tail call void @__cxa_end_catch() nounwind - br label %lpad1.body - -for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i - br label %invoke.cont2.loopexit - -invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch - br label %invoke.cont2 - -invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i - tail call void @__cxa_end_catch() nounwind - br label %try.cont - -for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc - br label %try.cont.loopexit - -try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry - br label %try.cont - -try.cont: ; preds = %try.cont.loopexit, %invoke.cont2 - ret void - -lpad1.body: ; preds = %lpad1.i, %lpad.i - %exn.slot.0.i = phi ptr [ %10, %lpad1.i ], [ %6, %lpad.i ] - %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ] - tail call void @__cxa_end_catch() nounwind - br label %eh.resume - -eh.resume: ; preds = %lpad1.body, %lpad - %exn.slot.0 = phi ptr [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ] - %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ] - %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn.slot.0, 0 - %lpad.val5 = insertvalue { ptr, i32 } %lpad.val, i32 %ehselector.slot.0, 1 - resume { ptr, i32 } %lpad.val5 -} - -declare void @_Z3fooi(i32) - -declare i32 @__gxx_personality_v0(...) - -declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone - -declare ptr @__cxa_begin_catch(ptr) - -declare void @__cxa_end_catch() - -define void @test4(i1 %arg) nounwind uwtable { -entry: - br label %"7" - -"3": ; preds = %"7" - br i1 %arg, label %"31", label %"4" - -"4": ; preds = %"3" - %. = select i1 undef, float 0x3F50624DE0000000, float undef - %0 = add i32 %1, 1 - br label %"7" - -"7": ; preds = %"4", %entry - %1 = phi i32 [ %0, %"4" ], [ 0, %entry ] - %2 = icmp slt i32 %1, 100 - br i1 %2, label %"3", label %"8" - -"8": ; preds = %"7" - br i1 %arg, label %"9", label %"31" - -"9": ; preds = %"8" - br label %"33" - -"27": ; preds = %"31" - unreachable - -"31": ; preds = %"8", %"3" - br i1 %arg, label %"27", label %"32" - -"32": ; preds = %"31" - br label %"33" - -"33": ; preds = %"32", %"9" - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index f5329cf..c225ede5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -580,6 +580,201 @@ exit: ret double %accum } +define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %src, ptr noalias %src.2, ptr noalias %dst) #0 { +; I64-LABEL: define void @loaded_address_used_by_load_through_blend( +; I64-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; I64-NEXT: [[ENTRY:.*]]: +; I64-NEXT: br label %[[LOOP_HEADER:.*]] +; I64: [[LOOP_HEADER]]: +; I64-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; I64-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_LATCH]] ] +; I64-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1 +; I64-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]] +; I64-NEXT: [[L_SRC:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; I64-NEXT: [[C:%.*]] = fcmp oeq float [[L_SRC]], 0.000000e+00 +; I64-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; I64: [[THEN]]: +; I64-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV_1]], [[START]] +; I64-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[IV_MUL]] +; I64-NEXT: br label %[[LOOP_LATCH]] +; I64: [[LOOP_LATCH]]: +; I64-NEXT: [[MERGE_GEP:%.*]] = phi ptr [ [[GEP_SRC_2]], %[[THEN]] ], [ [[SRC_2]], %[[LOOP_HEADER]] ] +; I64-NEXT: [[L_2:%.*]] = load float, ptr [[MERGE_GEP]], align 4 +; I64-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; I64-NEXT: store float [[L_2]], ptr [[GEP_DST]], align 4 +; I64-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; I64-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 +; I64-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 100 +; I64-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]] +; I64: [[EXIT]]: +; I64-NEXT: ret void +; +; I32-LABEL: define void @loaded_address_used_by_load_through_blend( +; I32-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; I32-NEXT: [[ENTRY:.*:]] +; I32-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1 +; I32-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 100) +; I32-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]] +; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8 +; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; I32: [[VECTOR_PH]]: +; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8 +; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]] +; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0 +; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0 +; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; I32-NEXT: br label %[[VECTOR_BODY:.*]] +; I32: [[VECTOR_BODY]]: +; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 +; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 +; I32-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 4 +; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5 +; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6 +; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7 +; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1 +; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1 +; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1 +; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1 +; I32-NEXT: [[TMP15:%.*]] = add i64 [[TMP7]], 1 +; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1 +; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1 +; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1 +; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0 +; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1 +; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2 +; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3 +; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4 +; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5 +; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6 +; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7 +; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]] +; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]] +; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] +; I32-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]] +; I32-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]] +; I32-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP16]] +; I32-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP17]] +; I32-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] +; I32-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP27]], align 4 +; I32-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4 +; I32-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4 +; I32-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4 +; I32-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4 +; I32-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4 +; I32-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4 +; I32-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4 +; I32-NEXT: [[TMP43:%.*]] = insertelement <8 x float> poison, float [[TMP35]], i32 0 +; I32-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP36]], i32 1 +; I32-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP37]], i32 2 +; I32-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP38]], i32 3 +; I32-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP39]], i32 4 +; I32-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP40]], i32 5 +; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6 +; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7 +; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer +; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]] +; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0 +; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]] +; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1 +; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]] +; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2 +; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]] +; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3 +; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]] +; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4 +; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]] +; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5 +; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]] +; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6 +; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]] +; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7 +; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]] +; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0 +; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1 +; I32-NEXT: [[TMP71:%.*]] = insertelement <8 x ptr> [[TMP70]], ptr [[TMP58]], i32 2 +; I32-NEXT: [[TMP72:%.*]] = insertelement <8 x ptr> [[TMP71]], ptr [[TMP60]], i32 3 +; I32-NEXT: [[TMP73:%.*]] = insertelement <8 x ptr> [[TMP72]], ptr [[TMP62]], i32 4 +; I32-NEXT: [[TMP74:%.*]] = insertelement <8 x ptr> [[TMP73]], ptr [[TMP64]], i32 5 +; I32-NEXT: [[TMP75:%.*]] = insertelement <8 x ptr> [[TMP74]], ptr [[TMP66]], i32 6 +; I32-NEXT: [[TMP76:%.*]] = insertelement <8 x ptr> [[TMP75]], ptr [[TMP68]], i32 7 +; I32-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP51]], <8 x ptr> [[TMP76]], <8 x ptr> [[BROADCAST_SPLAT2]] +; I32-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 0 +; I32-NEXT: [[TMP78:%.*]] = load float, ptr [[TMP77]], align 4 +; I32-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 1 +; I32-NEXT: [[TMP80:%.*]] = load float, ptr [[TMP79]], align 4 +; I32-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 2 +; I32-NEXT: [[TMP82:%.*]] = load float, ptr [[TMP81]], align 4 +; I32-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 3 +; I32-NEXT: [[TMP84:%.*]] = load float, ptr [[TMP83]], align 4 +; I32-NEXT: [[TMP85:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 4 +; I32-NEXT: [[TMP86:%.*]] = load float, ptr [[TMP85]], align 4 +; I32-NEXT: [[TMP87:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 5 +; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4 +; I32-NEXT: [[TMP89:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 6 +; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4 +; I32-NEXT: [[TMP91:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 7 +; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4 +; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] +; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] +; I32-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; I32-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] +; I32-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]] +; I32-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] +; I32-NEXT: store float [[TMP78]], ptr [[TMP93]], align 4 +; I32-NEXT: store float [[TMP80]], ptr [[TMP94]], align 4 +; I32-NEXT: store float [[TMP82]], ptr [[TMP95]], align 4 +; I32-NEXT: store float [[TMP84]], ptr [[TMP96]], align 4 +; I32-NEXT: store float [[TMP86]], ptr [[TMP97]], align 4 +; I32-NEXT: store float [[TMP88]], ptr [[TMP98]], align 4 +; I32-NEXT: store float [[TMP90]], ptr [[TMP99]], align 4 +; I32-NEXT: store float [[TMP92]], ptr [[TMP100]], align 4 +; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; I32-NEXT: [[TMP101:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; I32-NEXT: br i1 [[TMP101]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; I32: [[MIDDLE_BLOCK]]: +; I32-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; I32-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; I32: [[SCALAR_PH]]: +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.2 = phi i64 [ %start, %entry ], [ %iv.2.next, %loop.latch ] + %iv.1 = add i64 %iv, 1 + %gep.src = getelementptr i8, ptr %src, i64 %iv.1 + %l.src = load float, ptr %gep.src, align 4 + %c = fcmp oeq float %l.src, 0.000000e+00 + br i1 %c, label %then, label %loop.latch + +then: + %iv.mul = mul i64 %iv.1, %start + %gep.src.2 = getelementptr i8, ptr %src.2, i64 %iv.mul + br label %loop.latch + +loop.latch: + %merge.gep = phi ptr [ %gep.src.2, %then ], [ %src.2, %loop.header ] + %l.2 = load float, ptr %merge.gep, align 4 + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store float %l.2, ptr %gep.dst, align 4 + %iv.next = add i64 %iv, 1 + %iv.2.next = add i64 %iv.2, -1 + %ec = icmp sgt i64 %iv.2, 100 + br i1 %ec, label %loop.header, label %exit + +exit: + ret void +} + +attributes #0 = { "target-cpu"="znver3" } attributes #0 = { "target-cpu"="znver2" } !0 = distinct !{!0, !1} diff --git a/llvm/test/Transforms/NewGVN/ptrtoaddr.ll b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll new file mode 100644 index 0000000..e51b42a --- /dev/null +++ b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=newgvn < %s | FileCheck %s + +define i64 @ptrtoaddr_same(ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_same( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} + +; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ +; in result type for the same input. +define i64 @ptrtoaddr_different(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @ptrtoaddr_different( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll index caf7a80..7c9888f 100644 --- a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll +++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll @@ -436,10 +436,11 @@ bb104: ; preds = %bb102 br label %bb105 } +; Make sure the call is inlined. define i8 @test2(i8 %x) { ; CHECK-LABEL: define range(i8 0, 53) i8 @test2( ; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { -; CHECK-NEXT: [[CALL:%.*]] = tail call i8 @test(i8 [[X]]) +; CHECK-NEXT: [[CALL:%.*]] = tail call range(i8 0, 53) i8 @llvm.umin.i8(i8 [[X]], i8 52) ; CHECK-NEXT: ret i8 [[CALL]] ; %call = call i8 @test(i8 %x) diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll index 9bbe3eb..42d3dcc 100644 --- a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll @@ -97,8 +97,8 @@ if.end: ret ptr %x.0 } -!0 = !{!"int"} -!1 = !{!"char[4]"} +!0 = !{!"int", i1 0} +!1 = !{!"char[4]", i1 0} ;. -; CHECK: [[META0]] = !{!"int"} +; CHECK: [[META0]] = !{!"int", i1 false} ;. diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll index c9063d3..25267dc 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT +; RUN: opt -S -passes='simplifycfg<switch-to-arithmetic>' < %s | FileCheck %s --check-prefix=OPTNOLUT ; RUN: %if amdgpu-registered-target %{ opt -mtriple=amdgcn--amdpal -S -passes='simplifycfg<switch-to-lookup>' < %s | FileCheck %s --check-prefix=TTINOLUT %} ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,23 +7,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @linear_transform_with_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_with_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[END:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[CASE0:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE0]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 1, %[[CASE0]] ], [ 4, %[[CASE1]] ], [ 7, %[[CASE2]] ], [ 10, %[[CASE3]] ], [ 13, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 4 +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 +; OPTNOLUT-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 13 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_with_default( @@ -138,26 +126,8 @@ end: define i32 @linear_transform_no_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_no_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: i32 4, label %[[CASE4:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE4]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: unreachable -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = phi i32 [ 3, %[[CASE1]] ], [ 6, %[[CASE2]] ], [ 9, %[[CASE3]] ], [ 12, %[[CASE4]] ], [ 0, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 ; OPTNOLUT-NEXT: ret i32 [[SWITCH_IDX_MULT]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_no_default( @@ -350,18 +320,9 @@ end: define i32 @single_value_withdefault(i32 %x) { ; OPTNOLUT-LABEL: define i32 @single_value_withdefault( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[DOT:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[DOT:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[DOT]] ; ; TTINOLUT-LABEL: define i32 @single_value_withdefault( @@ -401,18 +362,9 @@ end: define i32 @single_value_no_jump_tables(i32 %x) "no-jump-tables"="true" { ; OPTNOLUT-LABEL: define i32 @single_value_no_jump_tables( ; OPTNOLUT-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @single_value_no_jump_tables( @@ -449,6 +401,60 @@ end: ret i32 %idx } +define i1 @single_value_with_mask(i32 %x) { +; OPTNOLUT-LABEL: define i1 @single_value_with_mask( +; OPTNOLUT-SAME: i32 [[X:%.*]]) { +; OPTNOLUT-NEXT: [[ENTRY:.*]]: +; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ +; OPTNOLUT-NEXT: i32 18, label %[[END:.*]] +; OPTNOLUT-NEXT: i32 21, label %[[END]] +; OPTNOLUT-NEXT: i32 48, label %[[END]] +; OPTNOLUT-NEXT: i32 16, label %[[END]] +; OPTNOLUT-NEXT: ] +; OPTNOLUT: [[DEFAULT]]: +; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; OPTNOLUT-NEXT: br label %[[END]] +; OPTNOLUT: [[END]]: +; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ] +; OPTNOLUT-NEXT: ret i1 [[RES]] +; +; TTINOLUT-LABEL: define i1 @single_value_with_mask( +; TTINOLUT-SAME: i32 [[X:%.*]]) { +; TTINOLUT-NEXT: [[ENTRY:.*]]: +; TTINOLUT-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X]], 16 +; TTINOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 33 +; TTINOLUT-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i32 [[SWITCH_TABLEIDX]] to i64 +; TTINOLUT-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i64 4294967333, [[SWITCH_MASKINDEX]] +; TTINOLUT-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i64 [[SWITCH_SHIFTED]] to i1 +; TTINOLUT-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false +; TTINOLUT-NEXT: br i1 [[OR_COND]], label %[[END:.*]], label %[[DEFAULT:.*]] +; TTINOLUT: [[DEFAULT]]: +; TTINOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; TTINOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; TTINOLUT-NEXT: br label %[[END]] +; TTINOLUT: [[END]]: +; TTINOLUT-NEXT: [[RES:%.*]] = phi i1 [ [[SEL]], %[[DEFAULT]] ], [ false, %[[ENTRY]] ] +; TTINOLUT-NEXT: ret i1 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] + +default: + %cmp = icmp eq i32 %x, 80 + %sel = select i1 %cmp, i1 false, i1 true + br label %end + +end: + %res = phi i1 [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ %sel, %default ] + ret i1 %res +} + define i32 @lookup_table(i32 %x) { ; OPTNOLUT-LABEL: define i32 @lookup_table( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { |