diff options
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r-- | llvm/test/Transforms/Coroutines/coro-elide-safe.ll (renamed from llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll) | 28 | ||||
-rw-r--r-- | llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll | 32 | ||||
-rw-r--r-- | llvm/test/Transforms/DFAJumpThreading/max-path-length.ll | 6 | ||||
-rw-r--r-- | llvm/test/Transforms/GVN/assume-equal.ll | 49 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/ptrtoaddr.ll | 77 | ||||
-rw-r--r-- | llvm/test/Transforms/InstSimplify/ptr_diff.ll | 59 | ||||
-rw-r--r-- | llvm/test/Transforms/LICM/vector-intrinsics.ll | 176 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/single_early_exit.ll | 47 | ||||
-rw-r--r-- | llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll | 1 |
10 files changed, 401 insertions, 76 deletions
diff --git a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll index 4eec7ed..722693d 100644 --- a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-safe.ll @@ -1,4 +1,8 @@ -; Testing elide performed its job for calls to coroutines marked safe. +; Coroutine calls marked with `coro_elide_safe` should be elided. +; Inside `caller`, we expect the `callee` coroutine to be elided. +; Inside `caller_conditional`, `callee` is only called on an unlikely +; path, hence we expect the `callee` coroutine NOT to be elided. +; ; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' | FileCheck %s %struct.Task = type { ptr } @@ -57,7 +61,7 @@ define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) { ; Function Attrs: presplitcoroutine define ptr @caller() #0 { entry: - %task = call ptr @callee(i8 0) #1 + %task = call ptr @callee(i8 0) coro_elide_safe ret ptr %task ; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8 ; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8 @@ -69,6 +73,25 @@ entry: ; CHECK-NEXT: ret ptr %[[TASK]] } +; CHECK-LABEL: define ptr @caller_conditional(i1 %cond) +; Function Attrs: presplitcoroutine +define ptr @caller_conditional(i1 %cond) #0 { +entry: + br i1 %cond, label %call, label %ret + +call: + ; CHECK-NOT: alloca + ; CHECK-NOT: @llvm.coro.id({{.*}}, ptr @callee, {{.*}}) + ; CHECK: %task = call ptr @callee(i8 0) + ; CHECK-NEXT: br label %ret + %task = call ptr @callee(i8 0) coro_elide_safe + br label %ret + +ret: + %retval = phi ptr [ %task, %call ], [ null, %entry ] + ret ptr %retval +} + declare token @llvm.coro.id(i32, ptr, ptr, ptr) declare ptr @llvm.coro.begin(token, ptr) declare ptr @llvm.coro.frame() @@ -76,4 +99,3 @@ declare ptr @llvm.coro.subfn.addr(ptr, i8) declare i1 @llvm.coro.alloc(token) attributes #0 = { presplitcoroutine } -attributes #1 = { coro_elide_safe } diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index 4173c32..f45798b 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -7,10 +7,10 @@ ; state, and the block that determines the next state. ; < path of BBs that form a cycle > [ state, determinator ] define i32 @test1(i32 %num) !prof !0{ -; CHECK: < case2 for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ] -; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ] +; CHECK: < case2, for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < case1, for.inc, for.body > [ 2, for.inc ] +; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ] entry: br label %for.body @@ -47,12 +47,12 @@ for.end: ; complicated CFG. Here the FSM is represented as a nested loop, with ; fallthrough cases. define i32 @test2(i32 %init) { -; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] -; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] -; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] -; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ] -; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ] -; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ] +; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ] +; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ] entry: %cmp = icmp eq i32 %init, 0 %sel = select i1 %cmp, i32 0, i32 2 @@ -187,12 +187,12 @@ bb66: ; preds = %bb59 ; Value %init is not predictable but it's okay since it is the value initial to the switch. define i32 @initial.value.positive1(i32 %init) !prof !0 { -; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] -; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] -; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] -; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ] -; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ] -; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ] +; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ] +; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ] entry: %cmp = icmp eq i32 %init, 0 br label %loop.1 diff --git a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll index 92747629..cb7c46e 100644 --- a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll +++ b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll @@ -9,9 +9,9 @@ ; too long so that it is not jump-threaded. define i32 @max_path_length(i32 %num) { ; CHECK-NOT: 3, case1 -; CHECK: < case2 for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ] +; CHECK: < case2, for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ] ; CHECK-NEXT: DFA-JT: Renaming non-local uses of: entry: br label %for.body diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll index 0c922da..bbbc5c5 100644 --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -221,21 +221,22 @@ define i32 @_Z1ii(i32 %p) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[P]], 42 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB2]] -; CHECK: [[BB2]]: -; CHECK-NEXT: br i1 true, label %[[BB2]], label %[[BB2]] -; CHECK: [[BB0:.*:]] +; CHECK-NEXT: br i1 true, label %[[COMMON:.*]], label %[[COMMON]] +; CHECK: [[COMMON]]: +; CHECK-NEXT: br i1 true, label %[[COMMON]], label %[[COMMON]] +; CHECK: [[EXIT:.*:]] ; CHECK-NEXT: ret i32 42 ; entry: %cmp = icmp eq i32 %p, 42 call void @llvm.assume(i1 %cmp) - br i1 %cmp, label %bb2, label %bb2 -bb2: + br i1 %cmp, label %common, label %common +common: call void @llvm.assume(i1 true) - br i1 %cmp, label %bb2, label %bb2 + br i1 %cmp, label %common, label %common +exit: ret i32 %p } @@ -357,8 +358,8 @@ define i8 @assume_ptr_eq_different_prov_matters(ptr %p, ptr %p2) { ret i8 %v } -define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) { -; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter( +define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(ptr %p, ptr %p2) { +; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp( ; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) @@ -371,6 +372,36 @@ define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) { ret i1 %c } +; This is not correct, as it may change the provenance exposed by ptrtoint. +; We still allow it for now. +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoint ptr %p2 to i64 + ret i64 %int +} + +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoaddr ptr %p2 to i64 + ret i64 %int +} + define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) { ; CHECK-LABEL: define i8 @assume_ptr_eq_same_prov( ; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) { diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index 61b1331..5211fbd 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -1,6 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s -target datalayout = "p1:64:64:64:32" + +; The ptrtoaddr folds are also valid for pointers that have external state. +target datalayout = "pe1:64:64:64:32" + +@g = external global i8 +@g2 = external global i8 + +@g.as1 = external addrspace(1) global i8 +@g2.as1 = external addrspace(1) global i8 define i32 @ptrtoaddr_inttoptr_arg(i32 %a) { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_arg( @@ -24,14 +32,14 @@ define i32 @ptrtoaddr_inttoptr() { define i32 @ptrtoaddr_inttoptr_diff_size1() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size1() { -; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) +; CHECK-NEXT: ret i32 -1 ; ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) } define i32 @ptrtoaddr_inttoptr_diff_size2() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size2() { -; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) +; CHECK-NEXT: ret i32 65535 ; ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) } @@ -52,14 +60,73 @@ define i64 @ptr2addr2_inttoptr_noas2() { define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { -; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) +; CHECK-NEXT: ret i64 4294967295 ; ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) } define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { -; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) +; CHECK-NEXT: ret i64 -1 ; ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) } + +define i64 @ptrtoaddr_gep_null() { +; CHECK-LABEL: define i64 @ptrtoaddr_gep_null() { +; CHECK-NEXT: ret i64 42 +; + ret i64 ptrtoaddr (ptr getelementptr (i8, ptr null, i64 42) to i64) +} + +define i32 @ptrtoaddr_gep_null_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_gep_null_addrsize() { +; CHECK-NEXT: ret i32 42 +; + ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i32 42) to i32) +} + +define i64 @ptrtoaddr_gep_sub() { +; CHECK-LABEL: define i64 @ptrtoaddr_gep_sub() { +; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +; + ret i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoaddr (ptr @g2 to i64))) to i64) +} + +define i32 @ptrtoaddr_gep_sub_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_gep_sub_addrsize() { +; CHECK-NEXT: ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32)) +; + ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 sub (i32 0, i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))) to i32) +} + +; Don't fold inttoptr of ptrtoaddr away. inttoptr will pick a previously +; exposed provenance, which is not necessarily that of @g (especially as +; ptrtoaddr does not expose the provenance.) +define ptr @inttoptr_of_ptrtoaddr() { +; CHECK-LABEL: define ptr @inttoptr_of_ptrtoaddr() { +; CHECK-NEXT: ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) +; + ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) +} + +define i64 @ptrtoaddr_sub_consts_unrelated() { +; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_unrelated() { +; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +; + ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +} + +define i64 @ptrtoaddr_sub_consts_offset() { +; CHECK-LABEL: define i64 @ptrtoaddr_sub_consts_offset() { +; CHECK-NEXT: ret i64 42 +; + ret i64 sub (i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 42) to i64), i64 ptrtoaddr (ptr @g to i64)) +} + +define i32 @ptrtoaddr_sub_consts_offset_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_sub_consts_offset_addrsize() { +; CHECK-NEXT: ret i32 42 +; + ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 42) to i32), i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32)) +} diff --git a/llvm/test/Transforms/InstSimplify/ptr_diff.ll b/llvm/test/Transforms/InstSimplify/ptr_diff.ll index d18b462..fdd9e8e 100644 --- a/llvm/test/Transforms/InstSimplify/ptr_diff.ll +++ b/llvm/test/Transforms/InstSimplify/ptr_diff.ll @@ -1,11 +1,9 @@ -; NOTE: Assertions have been autogenerated by update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" -define i64 @ptrdiff1(ptr %ptr) { -; CHECK-LABEL: @ptrdiff1( -; CHECK: ret i64 42 +define i64 @ptrdiff(ptr %ptr) { +; CHECK-LABEL: @ptrdiff( +; CHECK-NEXT: ret i64 42 ; %last = getelementptr inbounds i8, ptr %ptr, i32 42 %first.int = ptrtoint ptr %ptr to i64 @@ -14,9 +12,24 @@ define i64 @ptrdiff1(ptr %ptr) { ret i64 %diff } -define i64 @ptrdiff2(ptr %ptr) { -; CHECK-LABEL: @ptrdiff2( -; CHECK: ret i64 42 +define i64 @ptrdiff_no_inbounds(ptr %ptr) { +; CHECK-LABEL: @ptrdiff_no_inbounds( +; CHECK-NEXT: [[LAST:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 42 +; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]] +; CHECK-NEXT: ret i64 [[DIFF]] +; + %last = getelementptr i8, ptr %ptr, i32 42 + %first.int = ptrtoint ptr %ptr to i64 + %last.int = ptrtoint ptr %last to i64 + %diff = sub i64 %last.int, %first.int + ret i64 %diff +} + +define i64 @ptrdiff_chain(ptr %ptr) { +; CHECK-LABEL: @ptrdiff_chain( +; CHECK-NEXT: ret i64 42 ; %first2 = getelementptr inbounds i8, ptr %ptr, i32 1 %first3 = getelementptr inbounds i8, ptr %first2, i32 2 @@ -31,26 +44,10 @@ define i64 @ptrdiff2(ptr %ptr) { ret i64 %diff } -define i64 @ptrdiff3(ptr %ptr) { -; Don't bother with non-inbounds GEPs. -; CHECK-LABEL: @ptrdiff3( -; CHECK: [[LAST:%.*]] = getelementptr i8, ptr %ptr, i32 42 -; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint ptr %ptr to i64 -; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint ptr [[LAST]] to i64 -; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]] -; CHECK-NEXT: ret i64 [[DIFF]] -; - %last = getelementptr i8, ptr %ptr, i32 42 - %first.int = ptrtoint ptr %ptr to i64 - %last.int = ptrtoint ptr %last to i64 - %diff = sub i64 %last.int, %first.int - ret i64 %diff -} - -define <4 x i32> @ptrdiff4(<4 x ptr> %arg) nounwind { ; Handle simple cases of vectors of pointers. -; CHECK-LABEL: @ptrdiff4( -; CHECK: ret <4 x i32> zeroinitializer +define <4 x i32> @ptrdiff_vectors(<4 x ptr> %arg) nounwind { +; CHECK-LABEL: @ptrdiff_vectors( +; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %p1 = ptrtoint <4 x ptr> %arg to <4 x i32> %bc = bitcast <4 x ptr> %arg to <4 x ptr> @@ -63,9 +60,9 @@ define <4 x i32> @ptrdiff4(<4 x ptr> %arg) nounwind { @global = internal global %struct.ham zeroinitializer, align 4 -define i32 @ptrdiff5() nounwind { -; CHECK-LABEL: @ptrdiff5( -; CHECK: bb: +define i32 @ptrdiff_global() nounwind { +; CHECK-LABEL: @ptrdiff_global( +; CHECK-NEXT: bb: ; CHECK-NEXT: ret i32 0 ; bb: diff --git a/llvm/test/Transforms/LICM/vector-intrinsics.ll b/llvm/test/Transforms/LICM/vector-intrinsics.ll new file mode 100644 index 0000000..351773e --- /dev/null +++ b/llvm/test/Transforms/LICM/vector-intrinsics.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='loop-mssa(licm)' -verify-memoryssa %s | FileCheck %s + +define i32 @reduce_umax(<2 x i32> %inv, i1 %c) { +; CHECK-LABEL: define i32 @reduce_umax( +; CHECK-SAME: <2 x i32> [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[REDUCE_UMAX:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[INV]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[REDUCE_UMAX]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %reduce.umax = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %inv) + %backedge.cond = icmp ult i32 %iv, %reduce.umax + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_umax(<2 x i32> %inv.l, <2 x i32> %inv.r, i1 %c) { +; CHECK-LABEL: define i32 @vp_umax( +; CHECK-SAME: <2 x i32> [[INV_L:%.*]], <2 x i32> [[INV_R:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[VP_UMAX:%.*]] = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> [[INV_L]], <2 x i32> [[INV_R]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UMAX]], i32 0 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.umax = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %inv.l, <2 x i32> %inv.r, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.umax, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_udiv(<2 x i32> %inv.q, <2 x i32> %inv.d, i1 %c) { +; CHECK-LABEL: define i32 @vp_udiv( +; CHECK-SAME: <2 x i32> [[INV_Q:%.*]], <2 x i32> [[INV_D:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_UDIV:%.*]] = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> [[INV_Q]], <2 x i32> [[INV_D]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UDIV]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.udiv = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %inv.q, <2 x i32> %inv.d, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.udiv, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_load(ptr %inv, i1 %c) { +; CHECK-LABEL: define i32 @vp_load( +; CHECK-SAME: ptr [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_LOAD:%.*]] = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr [[INV]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_LOAD]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.load = call <2 x i32> @llvm.vp.load.v2i32(ptr %inv, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.load, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_store(<2 x i32> %inv.v, ptr %inv.p, i1 %c) { +; CHECK-LABEL: define i32 @vp_store( +; CHECK-SAME: <2 x i32> [[INV_V:%.*]], ptr [[INV_P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: call void @llvm.vp.store.v2i32.p0(<2 x i32> [[INV_V]], ptr [[INV_P]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], 10 +; CHECK-NEXT: br i1 [[BACKEDGE_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + call void @llvm.vp.store.v2i32(<2 x i32> %inv.v, ptr %inv.p, <2 x i1> splat (i1 1), i32 2) + %backedge.cond = icmp ult i32 %iv, 10 + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll index d73900d..83b494a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll @@ -2288,7 +2288,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { } ;. ; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2]] = { "vector-function-abi-variant"="_ZGVrNxv_acos(Sleef_acosdx_u10rvvm2)" } ; CHECK: attributes #[[ATTR3]] = { "vector-function-abi-variant"="_ZGVrNxv_acosf(Sleef_acosfx_u10rvvm2)" } ; CHECK: attributes #[[ATTR4]] = { "vector-function-abi-variant"="_ZGVrNxv_acosh(Sleef_acoshdx_u10rvvm2)" } diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 3500c5c..4fd8d17 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -546,19 +546,50 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]]) ; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1 ; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PH]] ] -; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1 ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0 -; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]] ; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ] +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ] @@ -609,4 +640,6 @@ exit: ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll index 9acc6d6..09f583f 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll @@ -39,5 +39,4 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0 ; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |