diff options
Diffstat (limited to 'llvm/test/Transforms/InstCombine')
13 files changed, 647 insertions, 269 deletions
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-to-svbool-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-to-svbool-binops.ll index ecedbdb..abe1ed0 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-to-svbool-binops.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-to-svbool-binops.ll @@ -124,6 +124,39 @@ define <vscale x 8 x i1> @try_combine_svbool_binop_orr(<vscale x 8 x i1> %a, <vs ret <vscale x 8 x i1> %t3 } +; Verify predicate cast does not hinder "isAllActive" knowledge. +define <vscale x 8 x half> @try_combine_svbool_binop_fadd(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: @try_combine_svbool_binop_fadd( +; CHECK-NEXT: [[T2:%.*]] = fadd <vscale x 8 x half> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <vscale x 8 x half> [[T2]] +; + %t1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> splat (i1 true)) + %t2 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %t1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %t2 +} + +; Verify predicate cast does not hinder "isAllActive" knowledge. +define <vscale x 4 x float> @try_combine_svbool_binop_fmul(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: @try_combine_svbool_binop_fmul( +; CHECK-NEXT: [[T2:%.*]] = fmul <vscale x 4 x float> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <vscale x 4 x float> [[T2]] +; + %t1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> splat (i1 true)) + %t2 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %t1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %t2 +} + +; Verify predicate cast does not hinder "isAllActive" knowledge. +define <vscale x 2 x double> @try_combine_svbool_binop_fsub(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: @try_combine_svbool_binop_fsub( +; CHECK-NEXT: [[T2:%.*]] = fsub <vscale x 2 x double> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <vscale x 2 x double> [[T2]] +; + %t1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> splat (i1 true)) + %t2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %t1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %t2 +} + declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>) declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>) declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>) diff --git a/llvm/test/Transforms/InstCombine/bitreverse-hang.ll b/llvm/test/Transforms/InstCombine/bitreverse-hang.ll index bb01299..3f29509 100644 --- a/llvm/test/Transforms/InstCombine/bitreverse-hang.ll +++ b/llvm/test/Transforms/InstCombine/bitreverse-hang.ll @@ -21,7 +21,7 @@ @b = common global i32 0, align 4 ; CHECK: define i32 @fn1 -define i32 @fn1() #0 { +define i32 @fn1() { entry: %b.promoted = load i32, ptr @b, align 4, !tbaa !2 br label %for.body @@ -40,8 +40,6 @@ for.end: ; preds = %for.body ret i32 undef } -attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } - !llvm.module.flags = !{!0} !llvm.ident = !{!1} diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll new file mode 100644 index 0000000..2688540 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=instcombine %s | FileCheck %s +; RUN: opt -S -passes=instcombine %s \ +; RUN: -use-constant-int-for-fixed-length-splat \ +; RUN -use-constant-fp-for-fixed-length-splat \ +; RUN: -use-constant-int-for-scalable-splat \ +; RUN: -use-constant-fp-for-scalable-splat | FileCheck %s + +define <vscale x 4 x i32> @insert_div() { +; CHECK-LABEL: @insert_div( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 3), i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0) + %div = udiv <vscale x 4 x i32> %0, splat (i32 3) + ret <vscale x 4 x i32> %div +} + +define <vscale x 4 x i32> @insert_div_splat_lhs() { +; CHECK-LABEL: @insert_div_splat_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 5), <4 x i32> splat (i32 2), i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 2), <4 x i32> splat (i32 5), i64 0) + %div = udiv <vscale x 4 x i32> splat (i32 10), %0 + ret <vscale x 4 x i32> %div +} + +define <vscale x 4 x i32> @insert_div_mixed_splat() { +; CHECK-LABEL: @insert_div_mixed_splat( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 6), <4 x i32> splat (i32 3), i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0) + %div = udiv <vscale x 4 x i32> %0, splat (i32 3) + ret <vscale x 4 x i32> %div +} + +define <vscale x 4 x i32> @insert_mul() { +; CHECK-LABEL: @insert_mul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 7), i64 4) +; CHECK-NEXT: ret <vscale x 4 x i32> [[MUL]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4) + %mul = mul <vscale x 4 x i32> %0, splat (i32 7) + ret <vscale x 4 x i32> %mul +} + +define <vscale x 4 x i32> @insert_add() { +; CHECK-LABEL: @insert_add( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 16), i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0) + %add = add <vscale x 4 x i32> %0, splat (i32 11) + ret <vscale x 4 x i32> %add +} + +define <vscale x 4 x i32> @insert_add_non_splat_subvector() { +; CHECK-LABEL: @insert_add_non_splat_subvector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 101, i32 102, i32 103, i32 104>, i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0) + %add = add <vscale x 4 x i32> %0, splat (i32 100) + ret <vscale x 4 x i32> %add +} + +define <vscale x 4 x float> @insert_add_fp() { +; CHECK-LABEL: @insert_add_fp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> splat (float 6.250000e+00), <4 x float> splat (float 5.500000e+00), i64 0) +; CHECK-NEXT: ret <vscale x 4 x float> [[ADD]] +; +entry: + %0 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> splat(float 1.25), <4 x float> splat (float 0.5), i64 0) + %add = fadd <vscale x 4 x float> %0, splat (float 5.0) + ret <vscale x 4 x float> %add +} + +define <vscale x 8 x i32> @insert_add_scalable_subvector() { +; CHECK-LABEL: @insert_add_scalable_subvector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 20), <vscale x 4 x i32> splat (i32 -4), i64 0) +; CHECK-NEXT: ret <vscale x 8 x i32> [[ADD]] +; +entry: + %0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat(i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0) + %add = add <vscale x 8 x i32> %0, splat (i32 4) + ret <vscale x 8 x i32> %add +} + +define <vscale x 4 x i32> @insert_sub() { +; CHECK-LABEL: @insert_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 8) +; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8) + %sub = add <vscale x 4 x i32> %0, splat (i32 -11) + ret <vscale x 4 x i32> %sub +} + +define <vscale x 4 x i32> @insert_and_partially_undef() { +; CHECK-LABEL: @insert_and_partially_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> zeroinitializer, <4 x i32> splat (i32 4), i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[AND]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0) + %and = and <vscale x 4 x i32> %0, splat (i32 4) + ret <vscale x 4 x i32> %and +} + +define <vscale x 4 x i32> @insert_fold_chain() { +; CHECK-LABEL: @insert_fold_chain( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 11), <4 x i32> splat (i32 8), i64 0) +; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0) + %div = udiv <vscale x 4 x i32> %0, splat (i32 3) + %add = add <vscale x 4 x i32> %div, splat (i32 4) + ret <vscale x 4 x i32> %add +} + +; TODO: This could be folded more. +define <vscale x 4 x i32> @insert_add_both_insert_vector() { +; CHECK-LABEL: @insert_add_both_insert_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 10), <4 x i32> splat (i32 5), i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 -1), <4 x i32> splat (i32 2), i64 0) +; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] +; +entry: + %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 10), <4 x i32> splat (i32 5), i64 0) + %1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 -1), <4 x i32> splat (i32 2), i64 0) + %add = add <vscale x 4 x i32> %0, %1 + ret <vscale x 4 x i32> %add +} diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll new file mode 100644 index 0000000..871fb34 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -S -passes=instcombine | FileCheck %s + +; ctpop(~i & (i - 1)) -> bitwidth - cttz(i, false) +define i8 @ctlz_to_sub_bw_cttz(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_poison(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_poison( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_different_add(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_different_add( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], 1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, 1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_different_xor(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_different_xor( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, 1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +declare void @use(i8) + +define i8 @ctlz_to_sub_bw_cttz_multi_use_dec(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_dec( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: call void @use(i8 [[DEC]]) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + call void @use(i8 %dec) + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_multi_use_not(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_not( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: call void @use(i8 [[NOT]]) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + call void @use(i8 %not) + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_multi_use_and(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_and( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + call void @use(i8 %and) + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_commute_and( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %not, %dec + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(<2 x i8> %a0) { +; CHECK-LABEL: define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat( +; CHECK-SAME: <2 x i8> [[A0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw <2 x i8> splat (i8 8), [[TMP1]] +; CHECK-NEXT: ret <2 x i8> [[CLZ]] +; + %dec = add <2 x i8> %a0, <i8 -1, i8 -1> + %not = xor <2 x i8> %a0, <i8 -1, i8 -1> + %and = and <2 x i8> %dec, %not + %clz = tail call <2 x i8>@llvm.ctlz.v2i8(<2 x i8> %and, i1 false) + ret <2 x i8> %clz +} diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll index 2f1f9fc..fc9ab9f 100644 --- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll +++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll @@ -222,8 +222,7 @@ declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) define i32 @vec_to_scalar_select_scalar(i1 %b) { ; CHECK-LABEL: @vec_to_scalar_select_scalar( -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4> -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[S]]) +; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 3, i32 7 ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4> @@ -371,3 +370,36 @@ define float @test_fabs_select_multiuse_both_constant(i1 %cond, float %x) { %fabs = call float @llvm.fabs.f32(float %select) ret float %fabs } + +; Negative test: Don't replace with select between vector mask and zeroinitializer. +define <16 x i1> @test_select_of_active_lane_mask_bound(i64 %base, i64 %n, i1 %cond) { +; CHECK-LABEL: @test_select_of_active_lane_mask_bound( +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i64 [[N:%.*]], i64 0 +; CHECK-NEXT: [[MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[BASE:%.*]], i64 [[S]]) +; CHECK-NEXT: ret <16 x i1> [[MASK]] +; + %s = select i1 %cond, i64 %n, i64 0 + %mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %s) + ret <16 x i1> %mask +} + +define <16 x i1> @test_select_of_active_lane_mask_bound_both_constant(i64 %base, i64 %n, i1 %cond) { +; CHECK-LABEL: @test_select_of_active_lane_mask_bound_both_constant( +; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], <16 x i1> splat (i1 true), <16 x i1> zeroinitializer +; CHECK-NEXT: ret <16 x i1> [[MASK]] +; + %s = select i1 %cond, i64 16, i64 0 + %mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 0, i64 %s) + ret <16 x i1> %mask +} + +define { i64, i1 } @test_select_of_overflow_intrinsic_operand(i64 %n, i1 %cond) { +; CHECK-LABEL: @test_select_of_overflow_intrinsic_operand( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[N:%.*]], i64 42) +; CHECK-NEXT: [[ADD_OVERFLOW:%.*]] = select i1 [[COND:%.*]], { i64, i1 } [[TMP1]], { i64, i1 } { i64 42, i1 false } +; CHECK-NEXT: ret { i64, i1 } [[ADD_OVERFLOW]] +; + %s = select i1 %cond, i64 %n, i64 0 + %add_overflow = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s, i64 42) + ret { i64, i1 } %add_overflow +} diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index 3454835..1c8b21c 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -3026,6 +3026,56 @@ join: ret i32 %umax } +define i32 @cross_lane_intrinsic_over_phi(i1 %c, i1 %c2, <4 x i32> %a) { +; CHECK-LABEL: @cross_lane_intrinsic_over_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A:%.*]]) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @may_exit() +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + br label %join + +join: + %phi = phi <4 x i32> [ %a, %if ], [ zeroinitializer, %entry ] + call void @may_exit() + %sum = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %phi) + ret i32 %sum +} + +define { i64, i1 } @overflow_intrinsic_over_phi(i1 %c, i64 %a) { +; CHECK-LABEL: @overflow_intrinsic_over_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 1) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi { i64, i1 } [ [[TMP0]], [[IF]] ], [ { i64 1, i1 false }, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @may_exit() +; CHECK-NEXT: ret { i64, i1 } [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + br label %join + +join: + %phi = phi i64 [ %a, %if ], [ 0, %entry ] + call void @may_exit() + %add_overflow = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %phi, i64 1) + ret { i64, i1 } %add_overflow +} + define i32 @multiple_intrinsics_with_multiple_phi_uses(i1 %c, i32 %arg) { ; CHECK-LABEL: @multiple_intrinsics_with_multiple_phi_uses( ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index 410c43c..f19cca8 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -40,3 +40,134 @@ define i128 @ptrtoaddr_sext(ptr %p) { %ext = sext i64 %p.addr to i128 ret i128 %ext } + +define i64 @sub_ptrtoaddr(ptr %p, i64 %offset) { +; CHECK-LABEL: define i64 @sub_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i64 [[OFFSET]] +; + %p2 = getelementptr i8, ptr %p, i64 %offset + %p.addr = ptrtoaddr ptr %p to i64 + %p2.addr = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %p2.addr, %p.addr + ret i64 %sub +} + +define i64 @sub_ptrtoint_ptrtoaddr(ptr %p, i64 %offset) { +; CHECK-LABEL: define i64 @sub_ptrtoint_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i64 [[OFFSET]] +; + %p2 = getelementptr i8, ptr %p, i64 %offset + %p.int = ptrtoint ptr %p to i64 + %p2.addr = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %p2.addr, %p.int + ret i64 %sub +} + +define i32 @sub_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) { +; CHECK-LABEL: define i32 @sub_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: ret i32 [[OFFSET]] +; + %p2 = getelementptr i8, ptr addrspace(1) %p, i32 %offset + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32 + %sub = sub i32 %p2.addr, %p.addr + ret i32 %sub +} + +define i32 @sub_trunc_ptrtoaddr(ptr %p, i64 %offset) { +; CHECK-LABEL: define i32 @sub_trunc_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[SUB:%.*]] = trunc i64 [[OFFSET]] to i32 +; CHECK-NEXT: ret i32 [[SUB]] +; + %p2 = getelementptr i8, ptr %p, i64 %offset + %p.addr = ptrtoaddr ptr %p to i64 + %p2.addr = ptrtoaddr ptr %p2 to i64 + %p.addr.trunc = trunc i64 %p.addr to i32 + %p2.addr.trunc = trunc i64 %p2.addr to i32 + %sub = sub i32 %p2.addr.trunc, %p.addr.trunc + ret i32 %sub +} + +define i16 @sub_trunc_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) { +; CHECK-LABEL: define i16 @sub_trunc_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[SUB:%.*]] = trunc i32 [[OFFSET]] to i16 +; CHECK-NEXT: ret i16 [[SUB]] +; + %p2 = getelementptr i8, ptr addrspace(1) %p, i32 %offset + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32 + %p.addr.trunc = trunc i32 %p.addr to i16 + %p2.addr.trunc = trunc i32 %p2.addr to i16 + %sub = sub i16 %p2.addr.trunc, %p.addr.trunc + ret i16 %sub +} + +define i16 @sub_trunc_ptrtoint_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) { +; CHECK-LABEL: define i16 @sub_trunc_ptrtoint_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[SUB:%.*]] = trunc i32 [[OFFSET]] to i16 +; CHECK-NEXT: ret i16 [[SUB]] +; + %p2 = getelementptr i8, ptr addrspace(1) %p, i32 %offset + %p.int = ptrtoint ptr addrspace(1) %p to i64 + %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32 + %p.int.trunc = trunc i64 %p.int to i16 + %p2.addr.trunc = trunc i32 %p2.addr to i16 + %sub = sub i16 %p2.addr.trunc, %p.int.trunc + ret i16 %sub +} + +define i128 @sub_zext_ptrtoaddr(ptr %p, i64 %offset) { +; CHECK-LABEL: define i128 @sub_zext_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[SUB:%.*]] = zext i64 [[OFFSET]] to i128 +; CHECK-NEXT: ret i128 [[SUB]] +; + %p2 = getelementptr nuw i8, ptr %p, i64 %offset + %p.addr = ptrtoaddr ptr %p to i64 + %p2.addr = ptrtoaddr ptr %p2 to i64 + %p.addr.ext = zext i64 %p.addr to i128 + %p2.addr.ext = zext i64 %p2.addr to i128 + %sub = sub i128 %p2.addr.ext, %p.addr.ext + ret i128 %sub +} + +define i64 @sub_zext_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) { +; CHECK-LABEL: define i64 @sub_zext_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[SUB:%.*]] = zext i32 [[OFFSET]] to i64 +; CHECK-NEXT: ret i64 [[SUB]] +; + %p2 = getelementptr nuw i8, ptr addrspace(1) %p, i32 %offset + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32 + %p.addr.ext = zext i32 %p.addr to i64 + %p2.addr.ext = zext i32 %p2.addr to i64 + %sub = sub i64 %p2.addr.ext, %p.addr.ext + ret i64 %sub +} + +define i128 @sub_zext_ptrtoint_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offset) { +; CHECK-LABEL: define i128 @sub_zext_ptrtoint_ptrtoaddr_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i8, ptr addrspace(1) [[P]], i32 [[OFFSET]] +; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 +; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32 +; CHECK-NEXT: [[P_INT_EXT:%.*]] = zext i64 [[P_INT]] to i128 +; CHECK-NEXT: [[P2_ADDR_EXT:%.*]] = zext i32 [[P2_ADDR]] to i128 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i128 [[P2_ADDR_EXT]], [[P_INT_EXT]] +; CHECK-NEXT: ret i128 [[SUB]] +; + %p2 = getelementptr nuw i8, ptr addrspace(1) %p, i32 %offset + %p.int = ptrtoint ptr addrspace(1) %p to i64 + %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32 + %p.int.ext = zext i64 %p.int to i128 + %p2.addr.ext = zext i32 %p2.addr to i128 + %sub = sub i128 %p2.addr.ext, %p.int.ext + ret i128 %sub +} diff --git a/llvm/test/Transforms/InstCombine/scmp.ll b/llvm/test/Transforms/InstCombine/scmp.ll index c0be5b9..2ae062cd 100644 --- a/llvm/test/Transforms/InstCombine/scmp.ll +++ b/llvm/test/Transforms/InstCombine/scmp.ll @@ -519,9 +519,7 @@ define <3 x i2> @scmp_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) { define i32 @scmp_sgt_slt(i32 %a) { ; CHECK-LABEL: define i32 @scmp_sgt_slt( ; CHECK-SAME: i32 [[A:%.*]]) { -; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr i32 [[A]], 31 -; CHECK-NEXT: [[CMP_INV:%.*]] = icmp slt i32 [[A]], 1 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[CMP_INV]], i32 [[A_LOBIT]], i32 1 +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %cmp = icmp sgt i32 %a, 0 @@ -747,3 +745,55 @@ define i8 @scmp_from_select_eq_and_gt_neg3(i32 %x, i32 %y) { %r = select i1 %eq, i8 0, i8 %sel1 ret i8 %r } + +define i32 @scmp_ashr(i32 %a) { +; CHECK-LABEL: define i32 @scmp_ashr( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[RETVAL_0:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; + %a.lobit = ashr i32 %a, 31 + %cmp.inv = icmp slt i32 %a, 1 + %retval.0 = select i1 %cmp.inv, i32 %a.lobit, i32 1 + ret i32 %retval.0 +} + +; select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) +define i8 @scmp_ashr_sgt_pattern(i8 %a) { +; CHECK-LABEL: define i8 @scmp_ashr_sgt_pattern( +; CHECK-SAME: i8 [[A:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i8(i8 [[A]], i8 0) +; CHECK-NEXT: ret i8 [[R]] +; + %a.lobit = ashr i8 %a, 7 + %cmp = icmp sgt i8 %a, 0 + %retval = select i1 %cmp, i8 1, i8 %a.lobit + ret i8 %retval +} + +; select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0) +define i8 @scmp_ashr_slt_pattern(i8 %a) { +; CHECK-LABEL: define i8 @scmp_ashr_slt_pattern( +; CHECK-SAME: i8 [[A:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.scmp.i8.i8(i8 [[A]], i8 0) +; CHECK-NEXT: ret i8 [[R]] +; + %a.lobit = ashr i8 %a, 7 + %cmp = icmp slt i8 %a, 1 + %retval = select i1 %cmp, i8 %a.lobit, i8 1 + ret i8 %retval +} + +define i8 @scmp_ashr_slt_pattern_neg(i8 %a) { +; CHECK-LABEL: define i8 @scmp_ashr_slt_pattern_neg( +; CHECK-SAME: i8 [[A:%.*]]) { +; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr i8 [[A]], 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A]], 1 +; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[CMP]], i8 [[A_LOBIT]], i8 1 +; CHECK-NEXT: ret i8 [[RETVAL]] +; + %a.lobit = ashr i8 %a, 4 + %cmp = icmp slt i8 %a, 1 + %retval = select i1 %cmp, i8 %a.lobit, i8 1 + ret i8 %retval +} diff --git a/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll b/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll index 2348490..2d06f42 100644 --- a/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll @@ -208,6 +208,3 @@ define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1 %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %r } - - -attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll index 621d278..6f80b7d 100644 --- a/llvm/test/Transforms/InstCombine/select-extractelement.ll +++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=instcombine < %s | FileCheck %s -declare void @v4float_user(<4 x float>) #0 +declare void @v4float_user(<4 x float>) -define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { +define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) { ; CHECK-LABEL: @extract_one_select( ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] @@ -17,7 +17,7 @@ define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { } ; Multiple extractelements -define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { +define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) { ; CHECK-LABEL: @extract_two_select( ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] @@ -34,7 +34,7 @@ define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) # } ; Select has an extra non-extractelement user, don't change it -define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 { +define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) { ; CHECK-LABEL: @extract_one_select_user( ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] @@ -49,7 +49,7 @@ define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 ret float %extract } -define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { +define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) { ; CHECK-LABEL: @extract_one_vselect_user( ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] @@ -67,7 +67,7 @@ define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> ; Do not convert the vector select into a scalar select. That would increase ; the instruction count and potentially obfuscate a vector min/max idiom. -define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { +define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) { ; CHECK-LABEL: @extract_one_vselect( ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] @@ -81,7 +81,7 @@ define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) } ; Multiple extractelements from a vector select -define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { +define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) { ; CHECK-LABEL: @extract_two_vselect( ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] @@ -100,7 +100,7 @@ define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32 ; The vector selects are not decomposed into scalar selects because that would increase ; the instruction count. Extract+insert is converted to non-lane-crossing shuffles. ; Test multiple extractelements -define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { +define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) { ; CHECK-LABEL: @simple_vector_select( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[C:%.*]], i64 0 @@ -232,5 +232,3 @@ define i32 @inf_loop_partial_undef(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x %t11 = extractelement <2 x i32> %p, i32 0 ret i32 %t11 } - -attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/InstCombine/select_frexp.ll b/llvm/test/Transforms/InstCombine/select_frexp.ll index d025aed..ccfcca1 100644 --- a/llvm/test/Transforms/InstCombine/select_frexp.ll +++ b/llvm/test/Transforms/InstCombine/select_frexp.ll @@ -115,10 +115,10 @@ define float @test_select_frexp_no_const(float %x, float %y, i1 %cond) { define i32 @test_select_frexp_extract_exp(float %x, i1 %cond) { ; CHECK-LABEL: define i32 @test_select_frexp_extract_exp( ; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], float 1.000000e+00, float [[X]] -; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SEL]]) +; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) ; CHECK-NEXT: [[FREXP_1:%.*]] = extractvalue { float, i32 } [[FREXP]], 1 -; CHECK-NEXT: ret i32 [[FREXP_1]] +; CHECK-NEXT: [[FREXP_2:%.*]] = select i1 [[COND]], i32 1, i32 [[FREXP_1]] +; CHECK-NEXT: ret i32 [[FREXP_2]] ; %sel = select i1 %cond, float 1.000000e+00, float %x %frexp = call { float, i32 } @llvm.frexp.f32.i32(float %sel) @@ -132,7 +132,7 @@ define float @test_select_frexp_fast_math_select(float %x, i1 %cond) { ; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) { ; CHECK-NEXT: [[FREXP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) ; CHECK-NEXT: [[MANTISSA:%.*]] = extractvalue { float, i32 } [[FREXP1]], 0 -; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select nnan ninf nsz i1 [[COND]], float 5.000000e-01, float [[MANTISSA]] +; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select i1 [[COND]], float 5.000000e-01, float [[MANTISSA]] ; CHECK-NEXT: ret float [[SELECT_FREXP]] ; %sel = select nnan ninf nsz i1 %cond, float 1.000000e+00, float %x diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll deleted file mode 100644 index 7816781..0000000 --- a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll +++ /dev/null @@ -1,243 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -passes=instcombine | FileCheck %s -@A = extern_weak global float, align 4 - -; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true -; and false values, while %v1 and %phi.to.remove are actually the same. -; Fold the selection instruction %same.as.v1 to %v1. -define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) { -; CHECK-LABEL: @select_with_identical_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 -; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] -; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] -; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] -; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 -; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] -; CHECK: exit: -; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] - %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] - %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] - %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] - %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] - %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] - %q.load = load float, ptr %q - %c.load = load float, ptr %c - %sub = fsub float %q.load, %c.load - %cmp1 = fcmp olt float %sub, %v0 - %v0.1 = select i1 %cmp1, float %sub, float %v0 - %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove - %cmp2 = fcmp ogt float %sub, %same.as.v1 - %v1.1 = select i1 %cmp2, float %sub, float %v1 - %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 - %inc.i = add nuw nsw i32 %i, 1 - %q.next = getelementptr inbounds i8, ptr %q, i64 4 - %c.next = getelementptr inbounds i8, ptr %c, i64 4 - %exitcond = icmp eq i32 %inc.i, %count - br i1 %exitcond, label %exit, label %for.body - -exit: - %vl.1.lcssa = phi float [ %v1.1, %for.body ] - store float %vl.1.lcssa, ptr @A - ret void -} - -; The difference from select_with_identical_phi() is that the true and false values in -; %phi.to.remove.next and %v1.1 are swapped. -; Check that %same.as.v1 can be folded. -define void @select_with_identical_phi_2(ptr %m, ptr %n, i32 %count) { -; CHECK-LABEL: @select_with_identical_phi_2( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 -; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] -; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] -; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] -; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 -; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] -; CHECK: exit: -; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] - %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] - %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] - %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] - %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] - %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] - %q.load = load float, ptr %q - %c.load = load float, ptr %c - %sub = fsub float %q.load, %c.load - %cmp1 = fcmp olt float %sub, %v0 - %v0.1 = select i1 %cmp1, float %sub, float %v0 - %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove - %cmp2 = fcmp ogt float %sub, %same.as.v1 - %v1.1 = select i1 %cmp2, float %v1, float %sub - %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub - %inc.i = add nuw nsw i32 %i, 1 - %q.next = getelementptr inbounds i8, ptr %q, i64 4 - %c.next = getelementptr inbounds i8, ptr %c, i64 4 - %exitcond = icmp eq i32 %inc.i, %count - br i1 %exitcond, label %exit, label %for.body - -exit: - %vl.1.lcssa = phi float [ %v1.1, %for.body ] - store float %vl.1.lcssa, ptr @A - ret void -} - -; The difference from select_with_identical_phi() is that the true and false values in -; same.as.v1 are swapped. -; Check that %same.as.v1 can be folded. -define void @select_with_identical_phi_3(ptr %m, ptr %n, i32 %count) { -; CHECK-LABEL: @select_with_identical_phi_3( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 -; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] -; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] -; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] -; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 -; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] -; CHECK: exit: -; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] - %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] - %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] - %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] - %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] - %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] - %q.load = load float, ptr %q - %c.load = load float, ptr %c - %sub = fsub float %q.load, %c.load - %cmp1 = fcmp olt float %sub, %v0 - %v0.1 = select i1 %cmp1, float %sub, float %v0 - %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 - %cmp2 = fcmp ogt float %sub, %same.as.v1 - %v1.1 = select i1 %cmp2, float %sub, float %v1 - %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 - %inc.i = add nuw nsw i32 %i, 1 - %q.next = getelementptr inbounds i8, ptr %q, i64 4 - %c.next = getelementptr inbounds i8, ptr %c, i64 4 - %exitcond = icmp eq i32 %inc.i, %count - br i1 %exitcond, label %exit, label %for.body - -exit: - %vl.1.lcssa = phi float [ %v1.1, %for.body ] - store float %vl.1.lcssa, ptr @A - ret void -} - -; The difference from select_with_identical_phi() is that the true and false values in -; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped. -; Check that %same.as.v1 can be folded. -define void @select_with_identical_phi_4(ptr %m, ptr %n, i32 %count) { -; CHECK-LABEL: @select_with_identical_phi_4( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 -; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] -; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] -; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] -; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 -; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] -; CHECK: exit: -; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] - %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] - %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] - %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] - %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] - %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] - %q.load = load float, ptr %q - %c.load = load float, ptr %c - %sub = fsub float %q.load, %c.load - %cmp1 = fcmp olt float %sub, %v0 - %v0.1 = select i1 %cmp1, float %sub, float %v0 - %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 - %cmp2 = fcmp ogt float %sub, %same.as.v1 - %v1.1 = select i1 %cmp2, float %v1, float %sub - %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub - %inc.i = add nuw nsw i32 %i, 1 - %q.next = getelementptr inbounds i8, ptr %q, i64 4 - %c.next = getelementptr inbounds i8, ptr %c, i64 4 - %exitcond = icmp eq i32 %inc.i, %count - br i1 %exitcond, label %exit, label %for.body - -exit: - %vl.1.lcssa = phi float [ %v1.1, %for.body ] - store float %vl.1.lcssa, ptr @A - ret void -} diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 8eeaea1..ee70137 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=instcombine < %s | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-p2:32:32" +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-p2:32:32-p3:32:32:32:16" define i64 @test_inbounds(ptr %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds( @@ -505,6 +505,23 @@ define i64 @negative_zext_ptrtoint_sub_ptrtoint_as2_nuw(i32 %offset) { ret i64 %D } +define i64 @negative_zext_ptrtoint_sub_zext_ptrtoint_as2_nuw_truncating(i32 %offset) { +; CHECK-LABEL: @negative_zext_ptrtoint_sub_zext_ptrtoint_as2_nuw_truncating( +; CHECK-NEXT: [[A:%.*]] = getelementptr nuw bfloat, ptr addrspace(2) @Arr_as2, i32 [[OFFSET:%.*]] +; CHECK-NEXT: [[A_IDX:%.*]] = ptrtoint ptr addrspace(2) [[A]] to i32 +; CHECK-NEXT: [[E:%.*]] = zext i32 [[A_IDX]] to i64 +; CHECK-NEXT: [[D:%.*]] = zext i16 ptrtoint (ptr addrspace(2) @Arr_as2 to i16) to i64 +; CHECK-NEXT: [[E1:%.*]] = sub nsw i64 [[E]], [[D]] +; CHECK-NEXT: ret i64 [[E1]] +; + %A = getelementptr nuw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset + %B = ptrtoint ptr addrspace(2) %A to i32 + %C = zext i32 %B to i64 + %D = zext i16 ptrtoint (ptr addrspace(2) @Arr_as2 to i16) to i64 + %E = sub i64 %C, %D + ret i64 %E +} + define i64 @ptrtoint_sub_zext_ptrtoint_as2_inbounds_local(ptr addrspace(2) %p, i32 %offset) { ; CHECK-LABEL: @ptrtoint_sub_zext_ptrtoint_as2_inbounds_local( ; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds bfloat, ptr addrspace(2) [[P:%.*]], i32 [[OFFSET:%.*]] @@ -614,6 +631,20 @@ define i64 @negative_zext_ptrtoint_sub_ptrtoint_as2_nuw_local(ptr addrspace(2) % ret i64 %D } +define i64 @zext_ptrtoint_sub_ptrtoint_as3_nuw_local(ptr addrspace(3) %p, i16 %offset) { +; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as3_nuw_local( +; CHECK-NEXT: [[SUB:%.*]] = zext i16 [[GEP_IDX:%.*]] to i64 +; CHECK-NEXT: ret i64 [[SUB]] +; + %gep = getelementptr nuw i8, ptr addrspace(3) %p, i16 %offset + %gep.int = ptrtoint ptr addrspace(3) %gep to i32 + %p.int = ptrtoint ptr addrspace(3) %p to i32 + %gep.int.ext = zext i32 %gep.int to i64 + %p.int.ext = zext i32 %p.int to i64 + %sub = sub i64 %gep.int.ext, %p.int.ext + ret i64 %sub +} + define i64 @test30(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 |
