aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r--llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll4
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll40
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll45
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll69
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll141
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll98
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll41
-rw-r--r--llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll88
-rw-r--r--llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll27
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll121
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll79
-rw-r--r--llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll84
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll126
-rw-r--r--llvm/test/Transforms/SCCP/relax-range-checks.ll24
-rw-r--r--llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll12
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll4
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll32
-rw-r--r--llvm/test/Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll55
-rw-r--r--llvm/test/Transforms/SimplifyCFG/indirectbr.ll32
-rw-r--r--llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll12
-rw-r--r--llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll213
21 files changed, 1266 insertions, 81 deletions
diff --git a/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll b/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
index d0e7c1c2..e1e1611 100644
--- a/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
+++ b/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
@@ -80,8 +80,8 @@ cleanup2:
; CHECK: cleanup2.corodispatch:
; CHECK: %1 = phi i8 [ 0, %handler2 ], [ 1, %catch.dispatch.2 ]
; CHECK: %2 = cleanuppad within %h1 []
-; CHECK: %switch = icmp ult i8 %1, 1
-; CHECK: br i1 %switch, label %cleanup2.from.handler2, label %cleanup2.from.catch.dispatch.2
+; CHECK: %3 = icmp eq i8 %1, 0
+; CHECK: br i1 %3, label %cleanup2.from.handler2, label %cleanup2.from.catch.dispatch.2
; CHECK: cleanup2.from.handler2:
; CHECK: %valueB.reload = load i32, ptr %valueB.spill.addr, align 4
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll
new file mode 100644
index 0000000..bcdf75b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call to a library function which is not marked as
+; NoCallback. Function bob() does not have internal linkage and hence prevents
+; norecurse to be added.
+
+@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+
+;.
+; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+;.
+define dso_local void @bob() {
+; CHECK-LABEL: define dso_local void @bob() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+ ret void
+}
+
+declare i32 @printf(ptr readonly captures(none), ...)
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bob()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bob()
+ ret i32 0
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll
new file mode 100644
index 0000000..a03b4ca
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call to a library function which is not marked as
+; NoCallback. All functions except main() are internal and main is marked
+; norecurse, so as to not block norecurse to be added to bob().
+
+@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+
+; Function Attrs: nofree noinline nounwind uwtable
+;.
+; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1
+;.
+define internal void @bob() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @bob(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str)
+ ret void
+}
+
+; Function Attrs: nofree nounwind
+declare i32 @printf(ptr readonly captures(none), ...)
+
+; Function Attrs: nofree norecurse nounwind uwtable
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bob()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bob()
+ ret i32 0
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll
new file mode 100644
index 0000000..5be707b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph which has a recursive function(foo2) which
+; calls a non-recursive internal function (foo3) satisfying the norecurse
+; attribute criteria.
+
+
+define internal void @foo3() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @foo3(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+define internal i32 @foo2(i32 %accum, i32 %n) {
+; CHECK-LABEL: define internal i32 @foo2(
+; CHECK-SAME: i32 [[ACCUM:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[RECURSE:.*]]
+; CHECK: [[RECURSE]]:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ACCUM]], [[SUB]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo2(i32 [[MUL]], i32 [[SUB]])
+; CHECK-NEXT: call void @foo3()
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ACCUM]], %[[ENTRY]] ], [ [[CALL]], %[[RECURSE]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cmp = icmp eq i32 %n, 0
+ br i1 %cmp, label %exit, label %recurse
+
+recurse:
+ %sub = sub i32 %n, 1
+ %mul = mul i32 %accum, %sub
+ %call = call i32 @foo2(i32 %mul, i32 %sub)
+ call void @foo3()
+ br label %exit
+
+exit:
+ %res = phi i32 [ %accum, %entry ], [ %call, %recurse ]
+ ret i32 %res
+}
+
+define internal i32 @foo1() {
+; CHECK-LABEL: define internal i32 @foo1() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @foo2(i32 1, i32 5)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @foo2(i32 1, i32 5)
+ ret i32 %res
+}
+
+define dso_local i32 @main() {
+; CHECK-LABEL: define dso_local i32 @main() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @foo1()
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @foo1()
+ ret i32 %res
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll
new file mode 100644
index 0000000..e351f60
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph with multiple SCCs. The purpose of this is
+; to check that norecurse is not added when a function is part of non-singular
+; SCC.
+; There are three different SCCs in this test:
+; SCC#1: f1, foo, bar, foo1, bar1
+; SCC#2: bar2, bar3, bar4
+; SCC#3: baz, fun
+; None of these functions should be marked as norecurse
+
+define internal void @bar1() {
+; CHECK-LABEL: define internal void @bar1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @f1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @f1()
+ ret void
+}
+
+define internal void @f1() {
+; CHECK-LABEL: define internal void @f1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @foo()
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @foo()
+ tail call void @bar2()
+ tail call void @baz()
+ ret void
+}
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @f1()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @f1()
+ ret i32 0
+}
+
+define internal void @foo1() {
+; CHECK-LABEL: define internal void @foo1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar1()
+ ret void
+}
+
+define internal void @bar() {
+; CHECK-LABEL: define internal void @bar() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @foo1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @foo1()
+ ret void
+}
+
+define internal void @foo() {
+; CHECK-LABEL: define internal void @foo() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar()
+ ret void
+}
+
+define internal void @bar4() {
+; CHECK-LABEL: define internal void @bar4() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ ret void
+}
+
+define internal void @bar2() {
+; CHECK-LABEL: define internal void @bar2() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar3()
+ ret void
+}
+
+define internal void @bar3() {
+; CHECK-LABEL: define internal void @bar3() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar4()
+ ret void
+}
+
+define internal void @fun() {
+; CHECK-LABEL: define internal void @fun() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @baz()
+ ret void
+}
+
+define internal void @baz() {
+; CHECK-LABEL: define internal void @baz() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @fun()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @fun()
+ ret void
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll
new file mode 100644
index 0000000..cd94037
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph with multiple SCCs. The purpose of this is
+; to check that norecurse is added to a function which calls a function which
+; is indirectly recursive but is not part of the recursive chain.
+; There are two SCCs in this test:
+; SCC#1: bar2, bar3, bar4
+; SCC#2: baz, fun
+; f1() calls bar2 and baz, both of which are part of some indirect recursive
+; chain. but does not call back f1() and hence f1() can be marked as
+; norecurse.
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @f1()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @f1()
+ ret i32 0
+}
+
+define internal void @f1() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @f1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ tail call void @baz()
+ ret void
+}
+
+define internal void @bar4() {
+; CHECK-LABEL: define internal void @bar4() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ ret void
+}
+
+define internal void @bar2() {
+; CHECK-LABEL: define internal void @bar2() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar3()
+ ret void
+}
+
+define internal void @bar3() {
+; CHECK-LABEL: define internal void @bar3() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar4()
+ ret void
+}
+
+define internal void @fun() {
+; CHECK-LABEL: define internal void @fun() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @baz()
+ ret void
+}
+
+define internal void @baz() {
+; CHECK-LABEL: define internal void @baz() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @fun()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @fun()
+ ret void
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll
new file mode 100644
index 0000000..8b81a90
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -passes=norecurse-lto-inference -S %s | FileCheck %s
+
+; This is a negative test which results in RefSCC with size > 1.
+; RefSCC : [(f2), (f1)]
+; --- SCC A (f1) --- size() = 1
+define internal void @f1() {
+; CHECK-LABEL: define internal void @f1() {
+; CHECK-NEXT: call void @f2()
+; CHECK-NEXT: ret void
+;
+ call void @f2()
+ ret void
+}
+
+; --- SCC B (f2) --- size() = 1
+; f2 indirectly calls f1 using locally allocated function pointer
+define internal void @f2() {
+; CHECK-LABEL: define internal void @f2() {
+; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: store ptr @f1, ptr [[FP]], align 8
+; CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr [[FP]], align 8
+; CHECK-NEXT: call void [[TMP]]()
+; CHECK-NEXT: ret void
+;
+ %fp = alloca void ()*
+ store void ()* @f1, void ()** %fp
+ %tmp = load void ()*, void ()** %fp
+ call void %tmp()
+ ret void
+}
+
+define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT: call void @f1()
+; CHECK-NEXT: ret i32 0
+;
+ call void @f1()
+ ret i32 0
+}
+
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll
new file mode 100644
index 0000000..461e5df
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s
+
+; This test includes a call graph with a self recursive function.
+; The purpose of this is to check that norecurse is added to functions
+; which have a self-recursive function in the call-chain.
+; The call-chain in this test is as follows
+; main -> bob -> callee1 -> callee2
+; where callee2 is self recursive.
+
+@x = dso_local global i32 4, align 4
+@y = dso_local global i32 2, align 4
+
+;.
+; CHECK: @x = dso_local global i32 4, align 4
+; CHECK: @y = dso_local global i32 2, align 4
+;.
+define internal void @callee2() {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define internal void @callee2(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @y, align 4
+; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT: store volatile i32 [[INC]], ptr @y, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile i32, ptr @y, align 4
+ %inc = add nsw i32 %0, 1
+ store volatile i32 %inc, ptr @y, align 4
+ ret void
+}
+
+define internal void @callee1(i32 %x) {
+; CHECK-LABEL: define internal void @callee1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: tail call void @callee1(i32 [[X]])
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: tail call void @callee2()
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void @callee1(i32 %x)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @callee2()
+ ret void
+}
+
+define internal void @bob() {
+; CHECK-LABEL: define internal void @bob() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @x, align 4
+; CHECK-NEXT: tail call void @callee2(i32 [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile i32, ptr @x, align 4
+ tail call void @callee2(i32 %0)
+ ret void
+}
+
+define dso_local i32 @main() norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define dso_local i32 @main(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bob()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bob()
+ ret i32 0
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse }
+;.
diff --git a/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll
index 9de9150..8b0a5ca 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
; TODO: All of these should be optimized to less than or equal to a single
@@ -7,13 +7,13 @@
; --- (A op B) op' A / (B op A) op' A ---
; (A land B) land A
-define i1 @land_land_left1(i1 %A, i1 %B) {
+define i1 @land_land_left1(i1 %A, i1 %B) !prof !0 {
; CHECK-LABEL: @land_land_left1(
-; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i1 [[C]]
;
- %c = select i1 %A, i1 %B, i1 false
- %res = select i1 %c, i1 %A, i1 false
+ %c = select i1 %A, i1 %B, i1 false, !prof !1
+ %res = select i1 %c, i1 %A, i1 false, !prof !2
ret i1 %res
}
define i1 @land_land_left2(i1 %A, i1 %B) {
@@ -157,13 +157,13 @@ define i1 @lor_band_left2(i1 %A, i1 %B) {
}
; (A lor B) lor A
-define i1 @lor_lor_left1(i1 %A, i1 %B) {
+define i1 @lor_lor_left1(i1 %A, i1 %B) !prof !0 {
; CHECK-LABEL: @lor_lor_left1(
-; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]], !prof [[PROF1]]
; CHECK-NEXT: ret i1 [[C]]
;
- %c = select i1 %A, i1 true, i1 %B
- %res = select i1 %c, i1 true, i1 %A
+ %c = select i1 %A, i1 true, i1 %B, !prof !1
+ %res = select i1 %c, i1 true, i1 %A, !prof !2
ret i1 %res
}
define i1 @lor_lor_left2(i1 %A, i1 %B) {
@@ -506,3 +506,12 @@ define <2 x i1> @PR50500_falseval(<2 x i1> %a, <2 x i1> %b) {
%r = select <2 x i1> %a, <2 x i1> %b, <2 x i1> %s
ret <2 x i1> %r
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 5, i32 7}
+
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll
new file mode 100644
index 0000000..22696d0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -prefer-inloop-reductions -mcpu=apple-m1 -force-vector-interleave=1 -S %s | FileCheck %s
+
+target triple = "arm64-apple-macosx"
+
+define i32 @mul_used_outside_vpexpression(ptr %src.0, ptr %src.1) {
+; CHECK-LABEL: define i32 @mul_used_outside_vpexpression(
+; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ITER_CHECK:.*]]:
+; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]])
+; CHECK-NEXT: [[TMP6]] = add i32 [[VEC_PHI]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP4]])
+; CHECK-NEXT: [[TMP8]] = or i32 [[VEC_PHI1]], [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 96
+; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; CHECK: [[VEC_EPILOG_PH]]:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP8]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 100
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1
+; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ [[BC_MERGE_RDX2]], %[[VEC_EPILOG_PH]] ], [ [[TMP19:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 [[INDEX3]]
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[NEXT_GEP6]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i8> [[WIDE_LOAD7]] to <4 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT9]] to <4 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = mul <4 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
+; CHECK-NEXT: [[TMP17]] = add i32 [[VEC_PHI4]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP15]])
+; CHECK-NEXT: [[TMP19]] = or i32 [[VEC_PHI5]], [[TMP18]]
+; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX3]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT10]], 100
+; CHECK-NEXT: br i1 [[TMP20]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[SRC_0]], %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i32 [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[GEP_0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED_0:%.*]] = phi i32 [ [[BC_MERGE_RDX12]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_0_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED_1:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_0]] = getelementptr i8, ptr [[PTR_IV]], i64 1
+; CHECK-NEXT: [[L_0:%.*]] = load i8, ptr [[PTR_IV]], align 1
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_1]], align 1
+; CHECK-NEXT: [[L_0_EXT:%.*]] = zext i8 [[L_0]] to i32
+; CHECK-NEXT: [[L_1_EXT:%.*]] = zext i8 [[L_1]] to i32
+; CHECK-NEXT: [[MUL_EXT_LL:%.*]] = mul i32 [[L_0_EXT]], [[L_1_EXT]]
+; CHECK-NEXT: [[RED_1_NEXT]] = or i32 [[MUL_EXT_LL]], [[RED_1]]
+; CHECK-NEXT: [[RED_0_NEXT]] = add i32 [[MUL_EXT_LL]], [[RED_0]]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 101
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RED_1_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_1_NEXT]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RED_0_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_0_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[RED_1_NEXT_LCSSA]], [[RED_0_NEXT_LCSSA]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %ptr.iv = phi ptr [ %src.0, %entry ], [ %gep.0, %loop ]
+ %red.0 = phi i32 [ 0, %entry ], [ %red.0.next, %loop ]
+ %red.1 = phi i32 [ 0, %entry ], [ %red.1.next, %loop ]
+ %gep.0 = getelementptr i8, ptr %ptr.iv, i64 1
+ %l.0 = load i8, ptr %ptr.iv, align 1
+ %gep.1 = getelementptr i8, ptr %src.1, i64 1
+ %l.1 = load i8, ptr %gep.1, align 1
+ %l.0.ext = zext i8 %l.0 to i32
+ %l.1.ext = zext i8 %l.1 to i32
+ %mul.ext.ll = mul i32 %l.0.ext, %l.1.ext
+ %red.1.next = or i32 %mul.ext.ll, %red.1
+ %red.0.next = add i32 %mul.ext.ll, %red.0
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, 101
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ %res = add i32 %red.1.next, %red.0.next
+ ret i32 %res
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll
new file mode 100644
index 0000000..6095b24
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-NO-PARTIAL-REDUCTION
+
+target triple = "aarch64"
+
+define i128 @add_reduc_i32_i128_unsupported(ptr %a, ptr %b) "target-features"="+dotprod" {
+; CHECK-NO-PARTIAL-REDUCTION-LABEL: define i128 @add_reduc_i32_i128_unsupported(
+; CHECK-NO-PARTIAL-REDUCTION-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ENTRY:.*:]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_PH]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_BODY]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[VEC_PHI:%.*]] = phi <4 x i128> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[WIDE_LOAD1]] to <4 x i64>
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP4:%.*]] = mul nuw <4 x i64> [[TMP1]], [[TMP3]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP5:%.*]] = zext <4 x i64> [[TMP4]] to <4 x i128>
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP7]] = add <4 x i128> [[VEC_PHI]], [[TMP5]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4024
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NO-PARTIAL-REDUCTION: [[MIDDLE_BLOCK]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP8:%.*]] = call i128 @llvm.vector.reduce.add.v4i128(<4 x i128> [[TMP7]])
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[SCALAR_PH]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK-NO-PARTIAL-REDUCTION: [[FOR_BODY]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV:%.*]] = phi i64 [ 4024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ACCUM:%.*]] = phi i128 [ [[TMP8]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[GEP_A]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_A:%.*]] = zext i32 [[LOAD_A]] to i64
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[GEP_B]], align 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_B:%.*]] = zext i32 [[LOAD_B]] to i64
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL:%.*]] = mul nuw i64 [[EXT_A]], [[EXT_B]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL_ZEXT:%.*]] = zext i64 [[MUL]] to i128
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD]] = add i128 [[ACCUM]], [[MUL_ZEXT]]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 4025
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NO-PARTIAL-REDUCTION: [[FOR_EXIT]]:
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD_LCSSA:%.*]] = phi i128 [ [[ADD]], %[[FOR_BODY]] ]
+; CHECK-NO-PARTIAL-REDUCTION-NEXT: ret i128 [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %accum = phi i128 [ 0, %entry ], [ %add, %for.body ]
+ %gep.a = getelementptr i32, ptr %a, i64 %iv
+ %load.a = load i32, ptr %gep.a, align 1
+ %ext.a = zext i32 %load.a to i64
+ %gep.b = getelementptr i32, ptr %b, i64 %iv
+ %load.b = load i32, ptr %gep.b, align 1
+ %ext.b = zext i32 %load.b to i64
+ %mul = mul nuw i64 %ext.a, %ext.b
+ %mul.zext = zext i64 %mul to i128
+ %add = add i128 %accum, %mul.zext
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 4025
+ br i1 %exitcond.not, label %for.exit, label %for.body
+
+for.exit:
+ ret i128 %add
+}
+;.
+; CHECK-NO-PARTIAL-REDUCTION: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-NO-PARTIAL-REDUCTION: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NO-PARTIAL-REDUCTION: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-NO-PARTIAL-REDUCTION: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll
new file mode 100644
index 0000000..fd83a01
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target triple = "armv7-unknown-linux-gnueabihf"
+
+define void @replicating_load_used_by_other_load(i32 %arg, ptr %a, i32 %b) {
+; CHECK-LABEL: define void @replicating_load_used_by_other_load(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[ARG]], %[[ENTRY]] ]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[IV]], 1
+; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[IV]], 1
+; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 [[IV]], 2
+; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 [[IV]], 1
+; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], 2
+; CHECK-NEXT: [[OR_1:%.*]] = or i32 [[AND_2]], [[AND_1]]
+; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[OR_1]], [[SHL_1]]
+; CHECK-NEXT: [[XOR_1:%.*]] = xor i32 [[B]], [[OR_2]]
+; CHECK-NEXT: [[XOR_2:%.*]] = xor i32 [[XOR_1]], [[ARG]]
+; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[SHL_1]], 1
+; CHECK-NEXT: [[XOR_3:%.*]] = xor i32 [[SHR]], [[ARG]]
+; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[XOR_3]], 1
+; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[IV]], 2147483646
+; CHECK-NEXT: [[OR_3:%.*]] = or i32 [[AND_3]], [[AND_4]]
+; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[IV]], 254
+; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 [[OR_3]], 1
+; CHECK-NEXT: [[XOR_4:%.*]] = xor i32 [[SHL_3]], 2
+; CHECK-NEXT: [[OR_4:%.*]] = or i32 [[AND_5]], [[XOR_4]]
+; CHECK-NEXT: [[XOR_5:%.*]] = xor i32 [[SHR_2]], [[OR_4]]
+; CHECK-NEXT: [[XOR_6:%.*]] = xor i32 [[XOR_5]], [[XOR_2]]
+; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[XOR_6]], 255
+; CHECK-NEXT: [[XOR_7:%.*]] = xor i32 [[AND_6]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[XOR_7]]
+; CHECK-NEXT: [[LD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD]] to i32
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i32, ptr null, i32 [[ZEXT]]
+; CHECK-NEXT: store i32 0, ptr [[GEP_2]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %iv.next, %loop ], [ %arg, %entry ]
+ %shr = lshr i32 %iv, 1
+ %and.1 = and i32 %iv, 1
+ %shl.1 = shl i32 %iv, 2
+ %shl.2 = shl i32 %iv, 1
+ %and.2 = and i32 %shl.2, 2
+ %or.1 = or i32 %and.2, %and.1
+ %or.2 = or i32 %or.1, %shl.1
+ %xor.1 = xor i32 %b, %or.2
+ %xor.2 = xor i32 %xor.1, %arg
+ %shr.2 = lshr i32 %shl.1, 1
+ %xor.3 = xor i32 %shr, %arg
+ %and.3 = and i32 %xor.3, 1
+ %and.4 = and i32 %iv, 2147483646
+ %or.3 = or i32 %and.3, %and.4
+ %and.5 = and i32 %iv, 254
+ %shl.3 = shl i32 %or.3, 1
+ %xor.4 = xor i32 %shl.3, 2
+ %or.4 = or i32 %and.5, %xor.4
+ %xor.5 = xor i32 %shr.2, %or.4
+ %xor.6 = xor i32 %xor.5, %xor.2
+ %and.6 = and i32 %xor.6, 255
+ %xor.7 = xor i32 %and.6, 1
+ %gep = getelementptr i8, ptr %a, i32 %xor.7
+ %ld = load i8, ptr %gep, align 1
+ %zext = zext i8 %ld to i32
+ %gep.2 = getelementptr i32, ptr null, i32 %zext
+ store i32 0, ptr %gep.2, align 4
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp eq i32 %iv.next, 100
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 8784873..f5329cf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -454,6 +454,132 @@ exit:
ret void
}
+declare i1 @cond()
+
+define double @test_load_used_by_other_load_scev(ptr %ptr.a, ptr %ptr.b, ptr %ptr.c) {
+; I64-LABEL: define double @test_load_used_by_other_load_scev(
+; I64-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I64-NEXT: [[ENTRY:.*]]:
+; I64-NEXT: br label %[[OUTER_LOOP:.*]]
+; I64: [[OUTER_LOOP_LOOPEXIT:.*]]:
+; I64-NEXT: br label %[[OUTER_LOOP]]
+; I64: [[OUTER_LOOP]]:
+; I64-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP29:%.*]], %[[OUTER_LOOP_LOOPEXIT]] ]
+; I64-NEXT: [[COND:%.*]] = call i1 @cond()
+; I64-NEXT: br i1 [[COND]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
+; I64: [[INNER_LOOP_PREHEADER]]:
+; I64-NEXT: br label %[[VECTOR_PH:.*]]
+; I64: [[VECTOR_PH]]:
+; I64-NEXT: br label %[[VECTOR_BODY:.*]]
+; I64: [[VECTOR_BODY]]:
+; I64-NEXT: [[TMP0:%.*]] = add i64 0, 1
+; I64-NEXT: [[TMP1:%.*]] = add i64 1, 1
+; I64-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP0]]
+; I64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP1]]
+; I64-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP0]]
+; I64-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP1]]
+; I64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
+; I64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; I64-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]]
+; I64-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]]
+; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8
+; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
+; I64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer
+; I64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8
+; I64-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
+; I64-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8
+; I64-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8
+; I64-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; I64-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1
+; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer
+; I64-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0
+; I64-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> <i32 1, i32 2>
+; I64-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP17]], zeroinitializer
+; I64-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], zeroinitializer
+; I64-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], splat (double 1.000000e+00)
+; I64-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP8]], align 8
+; I64-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP9]], align 8
+; I64-NEXT: [[TMP25:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0
+; I64-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[TMP24]], i32 1
+; I64-NEXT: [[TMP27:%.*]] = fdiv <2 x double> [[TMP26]], [[TMP22]]
+; I64-NEXT: [[TMP28:%.*]] = fsub <2 x double> [[TMP19]], [[TMP27]]
+; I64-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; I64: [[MIDDLE_BLOCK]]:
+; I64-NEXT: [[TMP29]] = extractelement <2 x double> [[TMP28]], i32 1
+; I64-NEXT: br label %[[OUTER_LOOP_LOOPEXIT]]
+; I64: [[EXIT]]:
+; I64-NEXT: ret double [[ACCUM]]
+;
+; I32-LABEL: define double @test_load_used_by_other_load_scev(
+; I32-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I32-NEXT: [[ENTRY:.*]]:
+; I32-NEXT: br label %[[OUTER_LOOP:.*]]
+; I32: [[OUTER_LOOP]]:
+; I32-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[RESULT:%.*]], %[[INNER_LOOP:.*]] ]
+; I32-NEXT: [[COND:%.*]] = call i1 @cond()
+; I32-NEXT: br i1 [[COND]], label %[[INNER_LOOP]], label %[[EXIT:.*]]
+; I32: [[INNER_LOOP]]:
+; I32-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[OUTER_LOOP]] ], [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT: [[ACCUM_INNER:%.*]] = phi double [ [[ACCUM]], %[[OUTER_LOOP]] ], [ [[MUL1:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT: [[IDX_PLUS1:%.*]] = add i64 [[IV]], 1
+; I32-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[IDX_PLUS1]]
+; I32-NEXT: [[GEP_A_I64:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[IDX_PLUS1]]
+; I32-NEXT: [[LOAD_IDX:%.*]] = load i64, ptr [[GEP_A_I64]], align 8
+; I32-NEXT: [[GEP_B:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[LOAD_IDX]]
+; I32-NEXT: [[LOAD_A:%.*]] = load double, ptr [[PTR_A]], align 8
+; I32-NEXT: [[ADD1:%.*]] = fadd double [[LOAD_A]], 0.000000e+00
+; I32-NEXT: [[GEP_C_OFFSET:%.*]] = getelementptr i8, ptr [[GEP_C]], i64 8
+; I32-NEXT: [[LOAD_C:%.*]] = load double, ptr [[GEP_C_OFFSET]], align 8
+; I32-NEXT: [[MUL1]] = fmul double [[ADD1]], 0.000000e+00
+; I32-NEXT: [[MUL2:%.*]] = fmul double [[LOAD_C]], 0.000000e+00
+; I32-NEXT: [[ADD2:%.*]] = fadd double [[MUL2]], 0.000000e+00
+; I32-NEXT: [[ADD3:%.*]] = fadd double [[ADD2]], 1.000000e+00
+; I32-NEXT: [[LOAD_B:%.*]] = load double, ptr [[GEP_B]], align 8
+; I32-NEXT: [[DIV:%.*]] = fdiv double [[LOAD_B]], [[ADD3]]
+; I32-NEXT: [[RESULT]] = fsub double [[ACCUM_INNER]], [[DIV]]
+; I32-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; I32-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; I32-NEXT: br i1 [[EXITCOND]], label %[[OUTER_LOOP]], label %[[INNER_LOOP]]
+; I32: [[EXIT]]:
+; I32-NEXT: ret double [[ACCUM]]
+;
+entry:
+ br label %outer.loop
+
+outer.loop:
+ %accum = phi double [ 0.0, %entry ], [ %result, %inner.loop ]
+ %cond = call i1 @cond()
+ br i1 %cond, label %inner.loop, label %exit
+
+inner.loop:
+ %iv = phi i64 [ 0, %outer.loop ], [ %iv.next, %inner.loop ]
+ %accum.inner = phi double [ %accum, %outer.loop ], [ %mul1, %inner.loop ]
+ %idx.plus1 = add i64 %iv, 1
+ %gep.c = getelementptr i8, ptr %ptr.c, i64 %idx.plus1
+ %gep.a.i64 = getelementptr i64, ptr %ptr.a, i64 %idx.plus1
+ %load.idx = load i64, ptr %gep.a.i64, align 8
+ %gep.b = getelementptr double, ptr %ptr.b, i64 %load.idx
+ %load.a = load double, ptr %ptr.a, align 8
+ %add1 = fadd double %load.a, 0.000000e+00
+ %gep.c.offset = getelementptr i8, ptr %gep.c, i64 8
+ %load.c = load double, ptr %gep.c.offset, align 8
+ %mul1 = fmul double %add1, 0.000000e+00
+ %mul2 = fmul double %load.c, 0.000000e+00
+ %add2 = fadd double %mul2, 0.000000e+00
+ %add3 = fadd double %add2, 1.000000e+00
+ %load.b = load double, ptr %gep.b, align 8
+ %div = fdiv double %load.b, %add3
+ %result = fsub double %accum.inner, %div
+ %iv.next = add i64 %iv, 1
+ %exitcond = icmp eq i64 %iv, 1
+ br i1 %exitcond, label %outer.loop, label %inner.loop
+
+exit:
+ ret double %accum
+}
+
attributes #0 = { "target-cpu"="znver2" }
!0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/SCCP/relax-range-checks.ll b/llvm/test/Transforms/SCCP/relax-range-checks.ll
index 90722f3..34e4813 100644
--- a/llvm/test/Transforms/SCCP/relax-range-checks.ll
+++ b/llvm/test/Transforms/SCCP/relax-range-checks.ll
@@ -89,4 +89,28 @@ define i1 @relax_range_check_multiuse(i8 range(i8 0, 5) %x) {
ret i1 %ret
}
+define i1 @range_check_to_icmp_eq1(i32 range(i32 0, 4) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_eq1(
+; CHECK-SAME: i32 range(i32 0, 4) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i32 [[X]], -3
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 3
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %off = add nsw i32 %x, -3
+ %cmp = icmp ult i32 %off, 2
+ ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_eq2(i32 range(i32 -1, 2) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_eq2(
+; CHECK-SAME: i32 range(i32 -1, 2) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i32 [[X]], -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add nsw i32 %x, -1
+ %cmp = icmp ult i32 %off, -2
+ ret i1 %cmp
+}
+
declare void @use(i8)
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll
index 655db54..a079203 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll
@@ -10,14 +10,10 @@ define void @test() {
; CHECK-NEXT: [[SUB4_I_I65_US:%.*]] = or i64 0, 1
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
-; CHECK-NEXT: [[ADD_I_I62_US:%.*]] = shl i64 0, 0
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> <i64 poison, i64 1>, i64 [[ADD_I_I62_US]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> zeroinitializer, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[CLASS_A:%.*]], <2 x ptr> zeroinitializer, <2 x i64> [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP2]], i32 4, <2 x i1> splat (i1 true), <2 x i32> poison)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-; CHECK-NEXT: [[CMP_I_I_I_I67_US:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> getelementptr ([[CLASS_A:%.*]], <2 x ptr> zeroinitializer, <2 x i64> <i64 0, i64 1>), i32 4, <2 x i1> splat (i1 true), <2 x i32> poison)
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+; CHECK-NEXT: [[CMP_I_I_I_I67_US:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[SPEC_SELECT_I_I68_US:%.*]] = select i1 false, i64 [[SUB4_I_I65_US]], i64 0
; CHECK-NEXT: br label [[BODY]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll
index 7758596..87f2cca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll
@@ -8,8 +8,8 @@ define i32 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: store i32 152, ptr @f, align 4
; CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD_I:%.*]] = load i32, ptr @f, align 4
-; CHECK-NEXT: [[ADD_I_I:%.*]] = shl i32 [[AGG_TMP_SROA_0_0_COPYLOAD_I]], 24
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 poison, i32 83886080, i32 83886080, i32 83886080, i32 83886080, i32 83886080, i32 83886080, i32 83886080>, i32 [[ADD_I_I]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> <i32 poison, i32 83886080, i32 83886080, i32 83886080, i32 83886080, i32 83886080, i32 83886080, i32 83886080>, i32 [[AGG_TMP_SROA_0_0_COPYLOAD_I]], i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i32> [[TMP3]], <i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> <i32 83886080, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 24)
; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[TMP2]], <i32 66440127, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 75aec45..3e0a374 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -247,32 +247,12 @@ entry:
}
define void @shl0(ptr noalias %dst, ptr noalias %src) {
-; NON-POW2-LABEL: @shl0(
-; NON-POW2-NEXT: entry:
-; NON-POW2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
-; NON-POW2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; NON-POW2-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
-; NON-POW2-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
-; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[INCDEC_PTR]], align 4
-; NON-POW2-NEXT: [[TMP2:%.*]] = shl <3 x i32> [[TMP1]], <i32 1, i32 2, i32 3>
-; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
-; NON-POW2-NEXT: ret void
-;
-; POW2-ONLY-LABEL: @shl0(
-; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
-; POW2-ONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; POW2-ONLY-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
-; POW2-ONLY-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
-; POW2-ONLY-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
-; POW2-ONLY-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
-; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
-; POW2-ONLY-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 2>
-; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
-; POW2-ONLY-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
-; POW2-ONLY-NEXT: [[SHL8:%.*]] = shl i32 [[TMP3]], 3
-; POW2-ONLY-NEXT: store i32 [[SHL8]], ptr [[INCDEC_PTR6]], align 4
-; POW2-ONLY-NEXT: ret void
+; CHECK-LABEL: @shl0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
+; CHECK-NEXT: ret void
;
entry:
%incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll b/llvm/test/Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll
index a5b1e9b..769b360 100644
--- a/llvm/test/Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/bool-logical-op-reduction-with-poison.ll
@@ -1,25 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=X86 %}
+; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefix=AARCH64 %}
define i1 @test(i32 %0, i32 %1, i32 %p) {
-; CHECK-LABEL: define i1 @test(
-; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[P:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i32> zeroinitializer, [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[CMP6:%.*]] = icmp slt i32 0, [[P]]
-; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 true, i1 [[CMP6]]
-; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[OP_RDX]]
-; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP8]], i1 true, i1 [[OP_RDX1]]
-; CHECK-NEXT: ret i1 [[OP_RDX2]]
+; X86-LABEL: define i1 @test(
+; X86-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[P:%.*]]) {
+; X86-NEXT: entry:
+; X86-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; X86-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; X86-NEXT: [[TMP4:%.*]] = shl <4 x i32> zeroinitializer, [[TMP3]]
+; X86-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer
+; X86-NEXT: [[CMP6:%.*]] = icmp slt i32 0, [[P]]
+; X86-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; X86-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; X86-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 true, i1 [[CMP6]]
+; X86-NEXT: [[OP_RDX1:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP1]]
+; X86-NEXT: [[TMP8:%.*]] = freeze i1 [[OP_RDX]]
+; X86-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP8]], i1 true, i1 [[OP_RDX1]]
+; X86-NEXT: ret i1 [[OP_RDX2]]
+;
+; AARCH64-LABEL: define i1 @test(
+; AARCH64-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[P:%.*]]) {
+; AARCH64-NEXT: entry:
+; AARCH64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; AARCH64-NEXT: [[SHL4:%.*]] = shl i32 0, [[TMP1]]
+; AARCH64-NEXT: [[CMP5:%.*]] = icmp slt i32 [[SHL4]], 0
+; AARCH64-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP1]], i32 1
+; AARCH64-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; AARCH64-NEXT: [[TMP4:%.*]] = shl <4 x i32> zeroinitializer, [[TMP3]]
+; AARCH64-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[P]], i32 0
+; AARCH64-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[TMP4]], [[TMP5]]
+; AARCH64-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
+; AARCH64-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; AARCH64-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP8]], i1 true, i1 [[CMP5]]
+; AARCH64-NEXT: [[OP_RDX1:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP1]]
+; AARCH64-NEXT: [[TMP9:%.*]] = freeze i1 [[OP_RDX]]
+; AARCH64-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP9]], i1 true, i1 [[OP_RDX1]]
+; AARCH64-NEXT: ret i1 [[OP_RDX2]]
;
entry:
%cmp1 = icmp sgt i32 %0, 0
diff --git a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
index 87d8b39..2fa36b0 100644
--- a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s
; SimplifyCFG should eliminate redundant indirectbr edges.
@@ -8,7 +8,11 @@ declare void @A()
declare void @B(i32)
declare void @C()
-define void @indbrtest0(ptr %P, ptr %Q) {
+;.
+; CHECK: @anchor = constant [13 x ptr] [ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@indbrtest4, %L1), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr)]
+; CHECK: @xblkx.bbs = internal unnamed_addr constant [9 x ptr] [ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %v2j), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %v2j)]
+;.
+define void @indbrtest0(ptr %P, ptr %Q) !prof !0 {
; CHECK-LABEL: @indbrtest0(
; CHECK-NEXT: entry:
; CHECK-NEXT: store ptr blockaddress(@indbrtest0, [[BB0:%.*]]), ptr [[P:%.*]], align 8
@@ -16,7 +20,7 @@ define void @indbrtest0(ptr %P, ptr %Q) {
; CHECK-NEXT: store ptr blockaddress(@indbrtest0, [[BB2:%.*]]), ptr [[P]], align 8
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[Q:%.*]], align 8
-; CHECK-NEXT: indirectbr ptr [[T]], [label [[BB0]], label [[BB1]], label %BB2]
+; CHECK-NEXT: indirectbr ptr [[T]], [label [[BB0]], label [[BB1]], label %BB2], !prof [[PROF1:![0-9]+]]
; CHECK: BB0:
; CHECK-NEXT: call void @A()
; CHECK-NEXT: br label [[BB1]]
@@ -36,7 +40,7 @@ entry:
store ptr blockaddress(@indbrtest0, %BB2), ptr %P
call void @foo()
%t = load ptr, ptr %Q
- indirectbr ptr %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2]
+ indirectbr ptr %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2], !prof !1
BB0:
call void @A()
br label %BB1
@@ -103,10 +107,10 @@ BB0:
; SimplifyCFG should turn the indirectbr into a conditional branch on the
; condition of the select.
-define void @indbrtest3(i1 %cond, ptr %address) nounwind {
+define void @indbrtest3(i1 %cond, ptr %address) nounwind !prof !0 {
; CHECK-LABEL: @indbrtest3(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[L1:%.*]], label [[L2:%.*]]
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[L1:%.*]], label [[L2:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: L1:
@@ -117,8 +121,8 @@ define void @indbrtest3(i1 %cond, ptr %address) nounwind {
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
- %indirect.goto.dest = select i1 %cond, ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2)
- indirectbr ptr %indirect.goto.dest, [label %L1, label %L2, label %L3]
+ %indirect.goto.dest = select i1 %cond, ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2), !prof !2
+ indirectbr ptr %indirect.goto.dest, [label %L1, label %L2, label %L3], !prof !3
L1:
call void @A()
@@ -385,3 +389,15 @@ declare i32 @xfunc5x()
declare i8 @xfunc7x()
declare i32 @xselectorx()
declare i32 @xactionx()
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5, i32 7, i32 11, i32 13, i32 17}
+!2 = !{!"branch_weights", i32 3, i32 5}
+!3 = !{!"branch_weights", i32 3, i32 5, i32 7}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 14, i32 18, i32 24}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 5}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index 4a457cc..a0e29dd 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -7,8 +7,7 @@ declare void @foo(i32)
define void @test(i1 %a) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: i1 [[A:%.*]]) {
-; CHECK-NEXT: [[A_OFF:%.*]] = add i1 [[A]], true
-; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i1 [[A_OFF]], true
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i1 [[A]], true
; CHECK-NEXT: br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
@@ -209,8 +208,7 @@ define void @test5(i8 %a) {
; CHECK-SAME: i8 [[A:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT: [[A_OFF:%.*]] = add i8 [[A]], -1
-; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i8 [[A]], 1
; CHECK-NEXT: br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
@@ -243,8 +241,7 @@ define void @test6(i8 %a) {
; CHECK-NEXT: [[AND:%.*]] = and i8 [[A]], -2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], -2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT: [[A_OFF:%.*]] = add i8 [[A]], 1
-; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i8 [[A]], -1
; CHECK-NEXT: br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
@@ -279,8 +276,7 @@ define void @test7(i8 %a) {
; CHECK-NEXT: [[AND:%.*]] = and i8 [[A]], -2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], -2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT: [[A_OFF:%.*]] = add i8 [[A]], 1
-; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i8 [[A]], -1
; CHECK-NEXT: br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
index 8f2ae2d..0fc3c19 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
@@ -188,4 +188,217 @@ exit:
ret void
}
+define i32 @wrapping_known_range(i8 range(i8 0, 6) %arg) {
+; CHECK-LABEL: @wrapping_known_range(
+; CHECK-NEXT: [[ARG_OFF:%.*]] = add i8 [[ARG:%.*]], -1
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[ARG_OFF]], 3
+; CHECK-NEXT: br i1 [[SWITCH]], label [[ELSE:%.*]], label [[IF:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[I0:%.*]], [[IF]] ], [ [[I1:%.*]], [[ELSE]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: if:
+; CHECK-NEXT: [[I0]] = call i32 @f(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[I1]] = call i32 @f(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+ switch i8 %arg, label %else [
+ i8 0, label %if
+ i8 4, label %if
+ i8 5, label %if
+ ]
+
+if:
+ %i0 = call i32 @f(i32 0)
+ ret i32 %i0
+
+else:
+ %i1 = call i32 @f(i32 1)
+ ret i32 %i1
+}
+
+define i32 @wrapping_known_range_2(i8 range(i8 0, 6) %arg) {
+; CHECK-LABEL: @wrapping_known_range_2(
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i8 [[ARG:%.*]], 1
+; CHECK-NEXT: br i1 [[SWITCH]], label [[ELSE:%.*]], label [[IF:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[I0:%.*]], [[IF]] ], [ [[I1:%.*]], [[ELSE]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: if:
+; CHECK-NEXT: [[I0]] = call i32 @f(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[I1]] = call i32 @f(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+ switch i8 %arg, label %else [
+ i8 0, label %if
+ i8 2, label %if
+ i8 3, label %if
+ i8 4, label %if
+ i8 5, label %if
+ ]
+
+if:
+ %i0 = call i32 @f(i32 0)
+ ret i32 %i0
+
+else:
+ %i1 = call i32 @f(i32 1)
+ ret i32 %i1
+}
+
+define i32 @wrapping_range(i8 %arg) {
+; CHECK-LABEL: @wrapping_range(
+; CHECK-NEXT: [[ARG_OFF:%.*]] = add i8 [[ARG:%.*]], -1
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[ARG_OFF]], -4
+; CHECK-NEXT: br i1 [[SWITCH]], label [[ELSE:%.*]], label [[IF:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[I0:%.*]], [[IF]] ], [ [[I1:%.*]], [[ELSE]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: if:
+; CHECK-NEXT: [[I0]] = call i32 @f(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[I1]] = call i32 @f(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+ switch i8 %arg, label %else [
+ i8 0, label %if
+ i8 -3, label %if
+ i8 -2, label %if
+ i8 -1, label %if
+ ]
+
+if:
+ %i0 = call i32 @f(i32 0)
+ ret i32 %i0
+
+else:
+ %i1 = call i32 @f(i32 1)
+ ret i32 %i1
+}
+
+define i8 @wrapping_range_phi(i8 %arg) {
+; CHECK-LABEL: @wrapping_range_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARG_OFF:%.*]] = add i8 [[ARG:%.*]], -1
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[ARG_OFF]], -2
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i8 0, i8 1
+; CHECK-NEXT: ret i8 [[SPEC_SELECT]]
+;
+entry:
+ switch i8 %arg, label %else [
+ i8 0, label %if
+ i8 -1, label %if
+ ]
+
+if:
+ %i = phi i8 [ 0, %else ], [ 1, %entry ], [ 1, %entry ]
+ ret i8 %i
+
+else:
+ br label %if
+}
+
+define i32 @no_continuous_wrapping_range(i8 %arg) {
+; CHECK-LABEL: @no_continuous_wrapping_range(
+; CHECK-NEXT: switch i8 [[ARG:%.*]], label [[ELSE:%.*]] [
+; CHECK-NEXT: i8 0, label [[IF:%.*]]
+; CHECK-NEXT: i8 -3, label [[IF]]
+; CHECK-NEXT: i8 -1, label [[IF]]
+; CHECK-NEXT: ]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[I0:%.*]], [[IF]] ], [ [[I1:%.*]], [[ELSE]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: if:
+; CHECK-NEXT: [[I0]] = call i32 @f(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[I1]] = call i32 @f(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+ switch i8 %arg, label %else [
+ i8 0, label %if
+ i8 -3, label %if
+ i8 -1, label %if
+ ]
+
+if:
+ %i0 = call i32 @f(i32 0)
+ ret i32 %i0
+
+else:
+ %i1 = call i32 @f(i32 1)
+ ret i32 %i1
+}
+
+define i32 @one_case_1(i32 %x) {
+; CHECK-LABEL: @one_case_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i32 [[X:%.*]], 10
+; CHECK-NEXT: br i1 [[SWITCH]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[TMP0:%.*]], [[B]] ], [ [[TMP1:%.*]], [[A]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: a:
+; CHECK-NEXT: [[TMP0]] = call i32 @f(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: b:
+; CHECK-NEXT: [[TMP1]] = call i32 @f(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ switch i32 %x, label %unreachable [
+ i32 5, label %a
+ i32 6, label %a
+ i32 7, label %a
+ i32 10, label %b
+ ]
+
+unreachable:
+ unreachable
+a:
+ %0 = call i32 @f(i32 0)
+ ret i32 %0
+b:
+ %1 = call i32 @f(i32 1)
+ ret i32 %1
+}
+
+define i32 @one_case_2(i32 %x) {
+; CHECK-LABEL: @one_case_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH:%.*]] = icmp eq i32 [[X:%.*]], 5
+; CHECK-NEXT: br i1 [[SWITCH]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[TMP0:%.*]], [[A]] ], [ [[TMP1:%.*]], [[B]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: a:
+; CHECK-NEXT: [[TMP0]] = call i32 @f(i32 0)
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: b:
+; CHECK-NEXT: [[TMP1]] = call i32 @f(i32 1)
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ switch i32 %x, label %unreachable [
+ i32 5, label %a
+ i32 10, label %b
+ i32 11, label %b
+ i32 12, label %b
+ i32 13, label %b
+ ]
+
+unreachable:
+ unreachable
+a:
+ %0 = call i32 @f(i32 0)
+ ret i32 %0
+b:
+ %1 = call i32 @f(i32 1)
+ ret i32 %1
+}
+
declare void @bar(ptr nonnull dereferenceable(4))