diff options
Diffstat (limited to 'llvm/test')
25 files changed, 3231 insertions, 549 deletions
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll index 207a44d..a08f859 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll @@ -565,6 +565,138 @@ e.2: ret void } +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_multiple_predecessors(ptr %A, ptr %B, i1 %c) nosync nofree { +; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_multiple_predecessors' +; CHECK-NEXT: loop.header: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %B High: (2000 + %B)) +; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %A High: (2000 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ] + br i1 %c, label %then, label %else + +then: + br label %loop.header + +else: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %latch ] + %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv + %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv + %l = load i32, ptr %gep.A, align 4 + store i32 0, ptr %gep.B, align 4 + %cntable.c.1 = icmp ult i64 %iv, 1000 + %iv.next = add nuw nsw i64 %iv, 1 + br i1 %cntable.c.1, label %b2, label %e.1 + +b2: + %uncntable.c.0 = icmp eq i32 %l, 0 + br i1 %uncntable.c.0, label %e.2, label %b3 + +b3: + %cntable.c.2 = icmp eq i64 %iv.next, 500 + br i1 %cntable.c.2, label %cleanup4, label %latch + +latch: + br label %loop.header + +cleanup4: + ret void + +e.1: + ret void + +e.2: + ret void +} + +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_multiple_predecessors_no_valid(ptr %A, ptr %B, i1 %c) nosync nofree { +; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption_multiple_predecessors_no_valid' +; CHECK-NEXT: loop.header: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %A High: (2000 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 2000) ] + br i1 %c, label %then, label %else + +then: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %B, i64 2000) ] + br label %loop.header + +else: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %latch ] + %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv + %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv + %l = load i32, ptr %gep.A, align 4 + store i32 0, ptr %gep.B, align 4 + %cntable.c.1 = icmp ult i64 %iv, 1000 + %iv.next = add nuw nsw i64 %iv, 1 + br i1 %cntable.c.1, label %b2, label %e.1 + +b2: + %uncntable.c.0 = icmp eq i32 %l, 0 + br i1 %uncntable.c.0, label %e.2, label %b3 + +b3: + %cntable.c.2 = icmp eq i64 %iv.next, 500 + br i1 %cntable.c.2, label %cleanup4, label %latch + +latch: + br label %loop.header + +cleanup4: + ret void + +e.1: + ret void + +e.2: + ret void +} + define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree { ; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small' ; CHECK-NEXT: loop.header: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll new file mode 100644 index 0000000..6eed0ec --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s + +; unsigned long long s0 = 0, s1 = 0; +; for (int i = 0; i < 100; i++) { +; if (i % 4 == 0) { +; A[s0] = 2; // A[0], A[4], A[8], A[12], ... +; A[s1] = 1; // A[0], A[8], A[16], A[24], ... +; } +; s0 += (1ULL << 62) + 1; +; s1 += (1ULL << 62) + 2; +; } +; FIXME: We cannot use inbounds on idx.0, idx.1 to infer no-wrap (and determine +; there are no dependences), as the pointers are not dereferenced in all loop iterations. +define void @test_inbounds_gep_used_in_predicated_block(ptr %A, i64 %n) { +; CHECK-LABEL: 'test_inbounds_gep_used_in_predicated_block' +; CHECK-NEXT: loop.header: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + %offset.0 = phi i64 [ 0, %entry ], [ %offset.0.next, %loop.latch ] + %offset.1 = phi i64 [ 0, %entry ], [ %offset.1.next, %loop.latch ] + %idx.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0 + %idx.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1 + %mask = and i64 %i, 3 + %cond = icmp eq i64 %mask, 0 + br i1 %cond, label %if.then, label %loop.latch + +if.then: + store i8 2, ptr %idx.0 + store i8 1, ptr %idx.1 + br label %loop.latch + +loop.latch: + %i.next = add nuw nsw i64 %i, 1 + %offset.0.next = add i64 %offset.0, 4611686018427387905 ; 2^62 + 1 + %offset.1.next = add i64 %offset.1, 4611686018427387906 ; 2^62 + 2 + %cond.exit = icmp eq i64 %i.next, 100 + br i1 %cond.exit, label %exit, label %loop.header + +exit: + ret void +} + +define void @test_header_existing(ptr %src, ptr %dst, i64 %start) { +; CHECK-LABEL: 'test_header_existing' +; CHECK-NEXT: loop.header: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: ptr %dst +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.src = getelementptr nusw { i8, i8, i32 }, ptr %src, i64 %iv.next +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %dst High: (1 + %dst)) +; CHECK-NEXT: Member: %dst +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (8 + (8 * %start) + %src) High: (809 + %src)) +; CHECK-NEXT: Member: {(8 + (8 * %start) + %src),+,8}<%loop.header> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ %start, %entry ], [ %iv.next, %loop.latch ] + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr nusw { i8, i8, i32 }, ptr %src, i64 %iv.next + %l = load i8, ptr %gep.src, align 1 + store i8 %l, ptr %dst, align 1 + br label %loop.header + +exit: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll new file mode 100644 index 0000000..f41cdc6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define i32 @mask_pair(i32 %x, i32 %y) { +; CHECK-SD-LABEL: mask_pair: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr w8, w0, w1 +; CHECK-SD-NEXT: lsl w0, w8, w1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mask_pair: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-GI-NEXT: lsl w8, w8, w1 +; CHECK-GI-NEXT: and w0, w8, w0 +; CHECK-GI-NEXT: ret + %shl = shl nsw i32 -1, %y + %and = and i32 %shl, %x + ret i32 %and +} + +define i64 @mask_pair_64(i64 %x, i64 %y) { +; CHECK-SD-LABEL: mask_pair_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr x8, x0, x1 +; CHECK-SD-NEXT: lsl x0, x8, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mask_pair_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-GI-NEXT: lsl x8, x8, x1 +; CHECK-GI-NEXT: and x0, x8, x0 +; CHECK-GI-NEXT: ret + %shl = shl nsw i64 -1, %y + %and = and i64 %shl, %x + ret i64 %and +} + +define i128 @mask_pair_128(i128 %x, i128 %y) { +; CHECK-SD-LABEL: mask_pair_128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-SD-NEXT: mvn w9, w2 +; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-SD-NEXT: lsl x8, x8, x2 +; CHECK-SD-NEXT: lsr x9, x10, x9 +; CHECK-SD-NEXT: tst x2, #0x40 +; CHECK-SD-NEXT: orr x9, x8, x9 +; CHECK-SD-NEXT: csel x9, x8, x9, ne +; CHECK-SD-NEXT: csel x8, xzr, x8, ne +; CHECK-SD-NEXT: and x0, x8, x0 +; CHECK-SD-NEXT: and x1, x9, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mask_pair_128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 +; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff +; CHECK-GI-NEXT: sub x10, x2, #64 +; CHECK-GI-NEXT: sub x8, x8, x2 +; CHECK-GI-NEXT: lsl x11, x9, x2 +; CHECK-GI-NEXT: cmp x2, #64 +; CHECK-GI-NEXT: lsr x8, x9, x8 +; CHECK-GI-NEXT: lsl x9, x9, x10 +; CHECK-GI-NEXT: csel x10, x11, xzr, lo +; CHECK-GI-NEXT: orr x8, x8, x11 +; CHECK-GI-NEXT: and x0, x10, x0 +; CHECK-GI-NEXT: csel x8, x8, x9, lo +; CHECK-GI-NEXT: cmp x2, #0 +; CHECK-GI-NEXT: csinv x8, x8, xzr, ne +; CHECK-GI-NEXT: and x1, x8, x1 +; CHECK-GI-NEXT: ret + %shl = shl nsw i128 -1, %y + %and = and i128 %shl, %x + ret i128 %and +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/cbz_wzr.mir b/llvm/test/CodeGen/AArch64/cbz_wzr.mir new file mode 100644 index 0000000..7deea56 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cbz_wzr.mir @@ -0,0 +1,260 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -o - %s -mtriple=aarch64-none-eabi -run-pass=machine-cp -mcp-use-is-copy-instr | FileCheck %s + +--- +name: cbz_wzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cbz_wzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBZW $wzr, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $w8 = ORRWrs $wzr, $wzr, 0 + CBZW killed renamable $w8, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... +--- +name: cbnz_wzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cbnz_wzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBNZW $wzr, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $w8 = ORRWrs $wzr, $wzr, 0 + CBNZW killed renamable $w8, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... +--- +name: tbz_wzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbz_wzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBZW $wzr, 0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $w8 = ORRWrs $wzr, $wzr, 0 + TBZW killed renamable $w8, 0, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... +--- +name: tbnz_wzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbnz_wzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBNZW $wzr, 0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $w8 = ORRWrs $wzr, $wzr, 0 + TBNZW killed renamable $w8, 0, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... + +--- +name: cbz_xzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cbz_xzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBZX $xzr, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $x8 = ORRXrs $xzr, $xzr, 0 + CBZX killed renamable $x8, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... +--- +name: cbnz_xzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cbnz_xzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBNZX $xzr, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $x8 = ORRXrs $xzr, $xzr, 0 + CBNZX killed renamable $x8, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... +--- +name: tbz_xzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbz_xzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBZX $xzr, 0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $x8 = ORRXrs $xzr, $xzr, 0 + TBZX killed renamable $x8, 0, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... +--- +name: tbnz_xzr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbnz_xzr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBNZX $xzr, 0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $w0 = MOVZWi 10, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $w0 = MOVZWi 20, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + bb.0: + liveins: $x0 + + $x8 = ORRXrs $xzr, $xzr, 0 + TBNZX killed renamable $x8, 0, %bb.2 + + bb.1: + $w0 = MOVZWi 10, 0 + RET undef $lr, implicit $w0 + + bb.2: + $w0 = MOVZWi 20, 0 + RET undef $lr, implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index 8e822d1..5a96116 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w10, w0, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w10, w0, w1 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: neg w9, w2 -; CHECK-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-NEXT: lsr w9, w10, w9 ; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %shifted = lshr i32 %val, %numskipbits @@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: mov w9, #32 // =0x20 -; CHECK-NEXT: mov w10, #-1 // =0xffffffff ; CHECK-NEXT: sub w9, w9, w2 ; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %skip = zext i8 %numskipbits to i32 @@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w10, w0, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w10, w8 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: neg x9, x2 -; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x9, x10, x9 ; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %shifted = lshr i64 %val, %numskipbits @@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; CHECK-LABEL: bextr64_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: mov w9, #64 // =0x40 -; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff ; CHECK-NEXT: sub w9, w9, w2 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: lsr x9, x10, x9 -; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %skip = zext i8 %numskipbits to i64 @@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x10, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_c1: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 @@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_c2: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits diff --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll index 4b8f3e8..368440c 100644 --- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll +++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll @@ -347,10 +347,9 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: neg w9, w1 -; CHECK-NEXT: lsr w8, w8, w9 -; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -362,10 +361,9 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c1_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -377,11 +375,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: neg w9, w1 -; CHECK-NEXT: ldr w10, [x0] -; CHECK-NEXT: lsr w8, w8, w9 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i32 32, %numlowbits @@ -394,11 +391,10 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: ldr w10, [x0] +; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsl w9, w9, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i8 32, %numlowbits @@ -411,10 +407,9 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: neg w9, w1 -; CHECK-NEXT: lsr w8, w8, w9 -; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -427,10 +422,9 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: neg x9, x1 -; CHECK-NEXT: lsr x8, x8, x9 -; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -442,10 +436,9 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c1_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -457,11 +450,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: neg x9, x1 -; CHECK-NEXT: ldr x10, [x0] -; CHECK-NEXT: lsr x8, x8, x9 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i64 64, %numlowbits @@ -474,11 +466,10 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr x10, [x0] +; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsl x9, x9, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i8 64, %numlowbits @@ -491,10 +482,9 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: neg x9, x1 -; CHECK-NEXT: lsr x8, x8, x9 -; CHECK-NEXT: and x0, x0, x8 +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll index d60c870..4287507 100644 --- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll @@ -1257,21 +1257,55 @@ entry: } define <4 x i32> @partial_reduce_shl_sext_const_rhs6(<16 x i8> %l, <4 x i32> %part) { -; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs6: +; CHECK-NODOT-LABEL: partial_reduce_shl_sext_const_rhs6: +; CHECK-NODOT: // %bb.0: +; CHECK-NODOT-NEXT: sshll v2.8h, v0.8b, #0 +; CHECK-NODOT-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-NODOT-NEXT: sshll v3.4s, v0.4h, #6 +; CHECK-NODOT-NEXT: sshll2 v4.4s, v2.8h, #6 +; CHECK-NODOT-NEXT: sshll v2.4s, v2.4h, #6 +; CHECK-NODOT-NEXT: sshll2 v0.4s, v0.8h, #6 +; CHECK-NODOT-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NODOT-NEXT: add v2.4s, v4.4s, v3.4s +; CHECK-NODOT-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NODOT-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NODOT-NEXT: ret +; +; CHECK-DOT-LABEL: partial_reduce_shl_sext_const_rhs6: +; CHECK-DOT: // %bb.0: +; CHECK-DOT-NEXT: movi v2.16b, #64 +; CHECK-DOT-NEXT: sdot v1.4s, v0.16b, v2.16b +; CHECK-DOT-NEXT: mov v0.16b, v1.16b +; CHECK-DOT-NEXT: ret +; +; CHECK-DOT-I8MM-LABEL: partial_reduce_shl_sext_const_rhs6: +; CHECK-DOT-I8MM: // %bb.0: +; CHECK-DOT-I8MM-NEXT: movi v2.16b, #64 +; CHECK-DOT-I8MM-NEXT: sdot v1.4s, v0.16b, v2.16b +; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b +; CHECK-DOT-I8MM-NEXT: ret + %ext = sext <16 x i8> %l to <16 x i32> + %shift = shl nsw <16 x i32> %ext, splat (i32 6) + %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift) + ret <4 x i32> %red +} + +define <4 x i32> @partial_reduce_shl_sext_const_rhs7(<16 x i8> %l, <4 x i32> %part) { +; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs7: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0 ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0 -; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #6 -; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #6 -; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #6 -; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #6 +; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #7 +; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #7 +; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #7 +; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #7 ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-COMMON-NEXT: ret %ext = sext <16 x i8> %l to <16 x i32> - %shift = shl nsw <16 x i32> %ext, splat (i32 6) + %shift = shl nsw <16 x i32> %ext, splat (i32 7) %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift) ret <4 x i32> %red } @@ -1331,19 +1365,33 @@ define <4 x i32> @partial_reduce_shl_sext_non_const_rhs(<16 x i8> %l, <4 x i32> } define <4 x i32> @partial_reduce_shl_zext_const_rhs6(<16 x i8> %l, <4 x i32> %part) { -; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs6: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #6 -; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #6 -; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #6 -; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #6 -; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s -; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-COMMON-NEXT: ret +; CHECK-NODOT-LABEL: partial_reduce_shl_zext_const_rhs6: +; CHECK-NODOT: // %bb.0: +; CHECK-NODOT-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NODOT-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-NODOT-NEXT: ushll v3.4s, v0.4h, #6 +; CHECK-NODOT-NEXT: ushll2 v4.4s, v2.8h, #6 +; CHECK-NODOT-NEXT: ushll v2.4s, v2.4h, #6 +; CHECK-NODOT-NEXT: ushll2 v0.4s, v0.8h, #6 +; CHECK-NODOT-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NODOT-NEXT: add v2.4s, v4.4s, v3.4s +; CHECK-NODOT-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NODOT-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NODOT-NEXT: ret +; +; CHECK-DOT-LABEL: partial_reduce_shl_zext_const_rhs6: +; CHECK-DOT: // %bb.0: +; CHECK-DOT-NEXT: movi v2.16b, #64 +; CHECK-DOT-NEXT: udot v1.4s, v0.16b, v2.16b +; CHECK-DOT-NEXT: mov v0.16b, v1.16b +; CHECK-DOT-NEXT: ret +; +; CHECK-DOT-I8MM-LABEL: partial_reduce_shl_zext_const_rhs6: +; CHECK-DOT-I8MM: // %bb.0: +; CHECK-DOT-I8MM-NEXT: movi v2.16b, #64 +; CHECK-DOT-I8MM-NEXT: udot v1.4s, v0.16b, v2.16b +; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b +; CHECK-DOT-I8MM-NEXT: ret %ext = zext <16 x i8> %l to <16 x i32> %shift = shl nsw <16 x i32> %ext, splat (i32 6) %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift) diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll index 4a04934..6946cc2 100644 --- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll +++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64 -O3 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -O3 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare void @t() @@ -581,3 +581,323 @@ end: ret void } +define ptr @tbnz_wzr(i1 %cmp1.not.i, ptr %locflg) { +; CHECK-SD-LABEL: tbnz_wzr: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: tbz w0, #0, .LBB20_2 +; CHECK-SD-NEXT: // %bb.1: +; CHECK-SD-NEXT: tbnz wzr, #0, .LBB20_3 +; CHECK-SD-NEXT: b .LBB20_4 +; CHECK-SD-NEXT: .LBB20_2: // %opnfil.exit.thread +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: tbz w8, #0, .LBB20_4 +; CHECK-SD-NEXT: .LBB20_3: // %if.else25 +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: .LBB20_4: // %common.ret +; CHECK-SD-NEXT: mov x0, xzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbnz_wzr: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #0 // =0x0 +; CHECK-GI-NEXT: tbz w0, #0, .LBB20_3 +; CHECK-GI-NEXT: // %bb.1: // %if.end10 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB20_4 +; CHECK-GI-NEXT: .LBB20_2: // %common.ret +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB20_3: // %opnfil.exit.thread +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: tbz w8, #0, .LBB20_2 +; CHECK-GI-NEXT: .LBB20_4: // %if.else25 +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: ret +entry: + br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread + +opnfil.exit.thread: ; preds = %entry + store i32 0, ptr %locflg, align 4 + br label %if.end10 + +if.end10: ; preds = %opnfil.exit.thread, %entry + %cmp5 = phi i1 [ true, %opnfil.exit.thread ], [ false, %entry ] + br i1 %cmp5, label %if.else25, label %if.then12 + +if.then12: ; preds = %if.end10 + %call20 = load i32, ptr null, align 4 + br label %if.end26 + +if.else25: ; preds = %if.end10 + store i32 0, ptr %locflg, align 4 + br label %if.end26 + +if.end26: ; preds = %if.else25, %if.then12 + br i1 %cmp5, label %common.ret, label %if.then28 + +common.ret: ; preds = %if.then28, %if.end26 + %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ] + ret ptr %common.ret.op + +if.then28: ; preds = %if.end26 + %0 = load ptr, ptr null, align 8 + br label %common.ret +} + +define ptr @tbz_wzr(i1 %cmp1.not.i, ptr %locflg) { +; CHECK-SD-LABEL: tbz_wzr: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: tbz w0, #0, .LBB21_2 +; CHECK-SD-NEXT: // %bb.1: +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: tbnz w8, #0, .LBB21_3 +; CHECK-SD-NEXT: b .LBB21_4 +; CHECK-SD-NEXT: .LBB21_2: // %opnfil.exit.thread +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: tbz wzr, #0, .LBB21_4 +; CHECK-SD-NEXT: .LBB21_3: // %if.else25 +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: .LBB21_4: // %common.ret +; CHECK-SD-NEXT: mov x0, xzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbz_wzr: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: tbz w0, #0, .LBB21_3 +; CHECK-GI-NEXT: // %bb.1: // %if.end10 +; CHECK-GI-NEXT: tbnz w8, #0, .LBB21_4 +; CHECK-GI-NEXT: .LBB21_2: // %common.ret +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB21_3: // %opnfil.exit.thread +; CHECK-GI-NEXT: mov w8, #0 // =0x0 +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: tbz w8, #0, .LBB21_2 +; CHECK-GI-NEXT: .LBB21_4: // %if.else25 +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: ret +entry: + br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread + +opnfil.exit.thread: ; preds = %entry + store i32 0, ptr %locflg, align 4 + br label %if.end10 + +if.end10: ; preds = %opnfil.exit.thread, %entry + %cmp5 = phi i1 [ false, %opnfil.exit.thread ], [ true, %entry ] + br i1 %cmp5, label %if.else25, label %if.then12 + +if.then12: ; preds = %if.end10 + %call20 = load i32, ptr null, align 4 + br label %if.end26 + +if.else25: ; preds = %if.end10 + store i32 0, ptr %locflg, align 4 + br label %if.end26 + +if.end26: ; preds = %if.else25, %if.then12 + br i1 %cmp5, label %common.ret, label %if.then28 + +common.ret: ; preds = %if.then28, %if.end26 + %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ] + ret ptr %common.ret.op + +if.then28: ; preds = %if.end26 + %0 = load ptr, ptr null, align 8 + br label %common.ret +} + +define ptr @cbnz_wzr(i1 %cmp1.not.i, ptr %locflg) { +; CHECK-SD-LABEL: cbnz_wzr: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: tbz w0, #0, .LBB22_2 +; CHECK-SD-NEXT: // %bb.1: +; CHECK-SD-NEXT: cbnz wzr, .LBB22_3 +; CHECK-SD-NEXT: b .LBB22_4 +; CHECK-SD-NEXT: .LBB22_2: // %opnfil.exit.thread +; CHECK-SD-NEXT: mov w8, #10 // =0xa +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: cbz w8, .LBB22_4 +; CHECK-SD-NEXT: .LBB22_3: // %if.else25 +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: .LBB22_4: // %common.ret +; CHECK-SD-NEXT: mov x0, xzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cbnz_wzr: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: tbnz w0, #0, .LBB22_2 +; CHECK-GI-NEXT: // %bb.1: // %opnfil.exit.thread +; CHECK-GI-NEXT: mov w8, #10 // =0xa +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: .LBB22_2: // %if.end10 +; CHECK-GI-NEXT: cbz w8, .LBB22_4 +; CHECK-GI-NEXT: // %bb.3: // %if.else25 +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: .LBB22_4: // %common.ret +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: ret +entry: + br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread + +opnfil.exit.thread: ; preds = %entry + store i32 0, ptr %locflg, align 4 + br label %if.end10 + +if.end10: ; preds = %opnfil.exit.thread, %entry + %cmp5 = phi i32 [ 10, %opnfil.exit.thread ], [ 0, %entry ] + %cmp5b = icmp ne i32 %cmp5, 0 + br i1 %cmp5b, label %if.else25, label %if.then12 + +if.then12: ; preds = %if.end10 + %call20 = load i32, ptr null, align 4 + br label %if.end26 + +if.else25: ; preds = %if.end10 + store i32 0, ptr %locflg, align 4 + br label %if.end26 + +if.end26: ; preds = %if.else25, %if.then12 + br i1 %cmp5b, label %common.ret, label %if.then28 + +common.ret: ; preds = %if.then28, %if.end26 + %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ] + ret ptr %common.ret.op + +if.then28: ; preds = %if.end26 + %0 = load ptr, ptr null, align 8 + br label %common.ret +} + +define ptr @cbz_wzr(i1 %cmp1.not.i, ptr %locflg) { +; CHECK-SD-LABEL: cbz_wzr: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: tbz w0, #0, .LBB23_2 +; CHECK-SD-NEXT: // %bb.1: +; CHECK-SD-NEXT: mov w8, #10 // =0xa +; CHECK-SD-NEXT: cbnz w8, .LBB23_3 +; CHECK-SD-NEXT: b .LBB23_4 +; CHECK-SD-NEXT: .LBB23_2: // %opnfil.exit.thread +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: cbz wzr, .LBB23_4 +; CHECK-SD-NEXT: .LBB23_3: // %if.else25 +; CHECK-SD-NEXT: str wzr, [x1] +; CHECK-SD-NEXT: .LBB23_4: // %common.ret +; CHECK-SD-NEXT: mov x0, xzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cbz_wzr: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #10 // =0xa +; CHECK-GI-NEXT: tbnz w0, #0, .LBB23_2 +; CHECK-GI-NEXT: // %bb.1: // %opnfil.exit.thread +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: .LBB23_2: // %if.end10 +; CHECK-GI-NEXT: cbz w8, .LBB23_4 +; CHECK-GI-NEXT: // %bb.3: // %if.else25 +; CHECK-GI-NEXT: str wzr, [x1] +; CHECK-GI-NEXT: .LBB23_4: // %common.ret +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: ret +entry: + br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread + +opnfil.exit.thread: ; preds = %entry + store i32 0, ptr %locflg, align 4 + br label %if.end10 + +if.end10: ; preds = %opnfil.exit.thread, %entry + %cmp5 = phi i32 [ 0, %opnfil.exit.thread ], [ 10, %entry ] + %cmp5b = icmp ne i32 %cmp5, 0 + br i1 %cmp5b, label %if.else25, label %if.then12 + +if.then12: ; preds = %if.end10 + %call20 = load i32, ptr null, align 4 + br label %if.end26 + +if.else25: ; preds = %if.end10 + store i32 0, ptr %locflg, align 4 + br label %if.end26 + +if.end26: ; preds = %if.else25, %if.then12 + br i1 %cmp5b, label %common.ret, label %if.then28 + +common.ret: ; preds = %if.then28, %if.end26 + %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ] + ret ptr %common.ret.op + +if.then28: ; preds = %if.end26 + %0 = load ptr, ptr null, align 8 + br label %common.ret +} + +define i1 @avifSequenceHeaderParse() { +; CHECK-SD-LABEL: avifSequenceHeaderParse: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: cbz w8, .LBB24_2 +; CHECK-SD-NEXT: .LBB24_1: // %bb6 +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB24_2: // %bb1 +; CHECK-SD-NEXT: cbz w8, .LBB24_4 +; CHECK-SD-NEXT: // %bb.3: +; CHECK-SD-NEXT: tbz xzr, #63, .LBB24_1 +; CHECK-SD-NEXT: b .LBB24_5 +; CHECK-SD-NEXT: .LBB24_4: // %bb2 +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: tbz x8, #63, .LBB24_1 +; CHECK-SD-NEXT: .LBB24_5: // %bb4 +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: avifSequenceHeaderParse: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: ret +entry: + %a = icmp slt i64 0, 0 + br i1 %a, label %bb1, label %bb6 + +bb1: ; preds = %entry + %b = icmp eq i32 1, 0 + br i1 %b, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + %c = load i8, ptr null, align 1 + %d = zext i8 1 to i64 + %e = shl i64 %d, 0 + br label %bb3 + +bb3: ; preds = %bb2, %bb1 + %f = phi i64 [ %e, %bb2 ], [ 0, %bb1 ] + %g = icmp slt i64 %f, 0 + br i1 %g, label %bb4, label %bb6 + +bb4: ; preds = %bb3 + %h = icmp eq i32 1, 0 + br i1 %h, label %bb5, label %bb7 + +bb5: ; preds = %bb4 + %i = load i8, ptr null, align 1 + %j = shl i64 0, 0 + br label %bb7 + +bb6: ; preds = %bb7, %bb3, %entry + %k = phi i1 [ false, %bb7 ], [ false, %bb3 ], [ false, %entry ] + ret i1 %k + +bb7: ; preds = %bb5, %bb4 + %l = phi ptr [ inttoptr (i64 1 to ptr), %bb5 ], [ null, %bb4 ] + %m = phi i64 [ %j, %bb5 ], [ 0, %bb4 ] + %n = icmp ult ptr %l, null + br label %bb6 +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll new file mode 100644 index 0000000..0de7f8f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-precise-allocate-to-module-struct.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s + +; Regression test for issue 160181 +; One variable is chosen to be assigned at zero. Here, that's @both +; Then other variables should be allocated at fixed offsets from that provided +; they are allocated by all the other kernels that presently allocate the +; variable at address zero. +; The failure mode was in that second check - variables could be added to +; the module scope zero address struct even when some of the kernels allocating +; that struct do not need the additional variable. + +; With current llvm, all three of these integers are put in the module scope struct, when +; neither kern_one or kern_two access all three. + +@both = addrspace(3) global i32 poison +@both_second = addrspace(3) global i16 poison ; a second field in the module struct +@one = addrspace(3) global i32 poison +@two = addrspace(3) global i32 poison + + +;. +; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata" +;. +define void @func_one() { +; CHECK-LABEL: define {{[^@]+}}@func_one() { +; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1:![0-9]+]] +; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18:![0-9]+]] +; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23:![0-9]+]] +; CHECK-NEXT: ret void +; + %val0 = load i32, ptr addrspace(3) @both + store i32 %val0, ptr addrspace(3) @one + store i16 10, ptr addrspace(3) @both_second + ret void +} + +define amdgpu_kernel void @kern_one() { +; CHECK-LABEL: define {{[^@]+}}@kern_one +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !noalias [[META24:![0-9]+]] +; CHECK-NEXT: call void @func_one() +; CHECK-NEXT: ret void +; +entry: + call void @func_one() + ret void +} + +define void @func_two() { +; CHECK-LABEL: define {{[^@]+}}@func_two() { +; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]] +; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25:![0-9]+]] +; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]] +; CHECK-NEXT: ret void +; + %val0 = load i32, ptr addrspace(3) @both + store i32 %val0, ptr addrspace(3) @two + store i16 20, ptr addrspace(3) @both_second + ret void +} + +define amdgpu_kernel void @kern_two() { +; CHECK-LABEL: define {{[^@]+}}@kern_two +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META26:![0-9]+]], !noalias [[META27:![0-9]+]] +; CHECK-NEXT: call void @func_two() +; CHECK-NEXT: ret void +; +entry: + call void @func_two() + ret void +} + +; Unrelated to the bug at hand, but if a variable is only +; reachable from a single kernel, it gets allocated to a fixed +; address independent of the module scope struct. This kernel +; means the key variables miss that optimisation while @both +; remains the best candidate for address zero allocation. +define void @func_block_direct_allocation() { +; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() { +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18]] +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25]] +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]] +; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]] +; CHECK-NEXT: ret void +; + %val1 = load i32, ptr addrspace(3) @one + %val2 = load i32, ptr addrspace(3) @two + %sum = add i32 %val1, %val2 + store i32 %sum, ptr addrspace(3) @both + store i16 30, ptr addrspace(3) @both_second + ret void +} + +define amdgpu_kernel void @kern_block_direct_allocation() { +; CHECK-LABEL: define {{[^@]+}}@kern_block_direct_allocation +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] +; CHECK-NEXT: call void @func_block_direct_allocation() +; CHECK-NEXT: call void @func_one() +; CHECK-NEXT: call void @func_two() +; CHECK-NEXT: ret void +; + call void @func_block_direct_allocation() + call void @func_one() + call void @func_two() + ret void +} +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="16" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]], [[META16:![0-9]+]], [[META17:![0-9]+]]} +; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]]} +; CHECK: [[META3]] = distinct !{[[META3]]} +; CHECK: [[META4]] = distinct !{[[META4]], [[META3]]} +; CHECK: [[META5]] = distinct !{[[META5]], [[META3]]} +; CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]]} +; CHECK: [[META7]] = distinct !{[[META7]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META7]]} +; CHECK: [[META9]] = distinct !{[[META9]], [[META7]]} +; CHECK: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]]} +; CHECK: [[META11]] = distinct !{[[META11]]} +; CHECK: [[META12]] = distinct !{[[META12]], [[META11]]} +; CHECK: [[META13]] = distinct !{[[META13]], [[META11]]} +; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]} +; CHECK: [[META15]] = distinct !{[[META15]]} +; CHECK: [[META16]] = distinct !{[[META16]], [[META15]]} +; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]} +; CHECK: [[META18]] = !{[[META19:![0-9]+]], [[META2]], [[META5]], [[META20:![0-9]+]], [[META6]], [[META9]], [[META21:![0-9]+]], [[META10]], [[META13]], [[META22:![0-9]+]], [[META14]], [[META17]]} +; CHECK: [[META19]] = distinct !{[[META19]], [[META3]]} +; CHECK: [[META20]] = distinct !{[[META20]], [[META7]]} +; CHECK: [[META21]] = distinct !{[[META21]], [[META11]]} +; CHECK: [[META22]] = distinct !{[[META22]], [[META15]]} +; CHECK: [[META23]] = !{[[META19]], [[META4]], [[META5]], [[META20]], [[META8]], [[META9]], [[META21]], [[META12]], [[META13]], [[META22]], [[META16]], [[META17]]} +; CHECK: [[META24]] = !{[[META10]], [[META12]], [[META13]], [[META14]], [[META16]], [[META17]]} +; CHECK: [[META25]] = !{[[META19]], [[META2]], [[META4]], [[META20]], [[META6]], [[META8]], [[META21]], [[META10]], [[META12]], [[META22]], [[META14]], [[META16]]} +; CHECK: [[META26]] = !{[[META22]]} +; CHECK: [[META27]] = !{[[META14]], [[META16]], [[META17]]} +;. diff --git a/llvm/test/CodeGen/ARM/inline-asm-clobber.ll b/llvm/test/CodeGen/ARM/inline-asm-clobber.ll index 7b1331f..f44ad2a 100644 --- a/llvm/test/CodeGen/ARM/inline-asm-clobber.ll +++ b/llvm/test/CodeGen/ARM/inline-asm-clobber.ll @@ -6,12 +6,19 @@ ; RUN: llc <%s -mtriple=arm-none-eabi --frame-pointer=all 2>&1 \ ; RUN: | FileCheck %s -check-prefix=NO_FP_ELIM +; RUN: llc <%s -mtriple=armv6-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS2 +; RUN: llc <%s -mtriple=armv6k-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS2 +; RUN: llc <%s -mtriple=armv6k-apple-ios3 2>&1 | FileCheck %s -check-prefix=IOS3 +; RUN: llc <%s -mtriple=armv7-apple-ios2 2>&1 | FileCheck %s -check-prefix=IOS3 + ; CHECK: warning: inline asm clobber list contains reserved registers: SP, PC ; CHECK: warning: inline asm clobber list contains reserved registers: R11 ; RWPI: warning: inline asm clobber list contains reserved registers: R9, SP, PC ; RWPI: warning: inline asm clobber list contains reserved registers: R11 ; NO_FP_ELIM: warning: inline asm clobber list contains reserved registers: R11, SP, PC ; NO_FP_ELIM: warning: inline asm clobber list contains reserved registers: R11 +; IOS2: warning: inline asm clobber list contains reserved registers: R9, SP, PC +; IOS3: warning: inline asm clobber list contains reserved registers: SP, PC define void @foo() nounwind { call void asm sideeffect "mov r7, #1", diff --git a/llvm/test/CodeGen/RISCV/xqcics.ll b/llvm/test/CodeGen/RISCV/xqcics.ll index 5b7ca9e7..60fc98c 100644 --- a/llvm/test/CodeGen/RISCV/xqcics.ll +++ b/llvm/test/CodeGen/RISCV/xqcics.ll @@ -690,3 +690,127 @@ entry: ret i32 %sel } +define i32 @select_cc_example_eq1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a1, a0, .LBB21_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB21_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieq a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_eq1: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectieq a0, a1, a2, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_eq1: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.line a2, a1, a0, 11 +; RV32IXQCI-NEXT: mv a0, a2 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp eq i32 %b, %a + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_ne1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ne1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a1, a0, .LBB22_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB22_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ne1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectine a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_ne1: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectine a0, a1, a2, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_ne1: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.lieq a2, a1, a0, 11 +; RV32IXQCI-NEXT: mv a0, a2 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp ne i32 %b, %a + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + + +define i32 @select_cc_example_eq2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a1, a0, .LBB23_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 11 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB23_2: +; RV32I-NEXT: li a0, 15 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectiieq a0, a1, 15, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_eq2: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectiieq a0, a1, 15, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_eq2: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.selectiieq a0, a1, 15, 11 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp eq i32 %b, %a + %sel = select i1 %cmp, i32 15, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_ne2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ne2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a1, a0, .LBB24_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 11 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: li a0, 15 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ne2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectiine a0, a1, 15, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_ne2: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectiine a0, a1, 15, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_ne2: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.selectiine a0, a1, 15, 11 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp ne i32 %b, %a + %sel = select i1 %cmp, i32 15, i32 11 + ret i32 %sel +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll index e6d3a4b..4d4fc1b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" @@ -9,12 +10,36 @@ declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @foo(i32 %guard, ...) { -; CHECK-LABEL: @foo -; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP1]] -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP1]], i1 false) -; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 ; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) @@ -27,11 +52,22 @@ define i32 @foo(i32 %guard, ...) { ;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ;; array. define i32 @bar() { -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 @@ -40,15 +76,28 @@ define i32 @bar() { ;; Check multiple fixed arguments. declare i32 @foo2(i32 %g1, i32 %g2, ...) define i32 @bar2() { -; CHECK-LABEL: @bar2 -; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } +; UTC_ARGS: --disable + ;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ;; passed to a variadic function. declare i64 @sum(i64 %n, ...) diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll index 69a74a3..9f3f10e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll @@ -1,9 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" target triple = "mips64--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 549755813888 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 549755813888 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 549755813888 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,23 +44,29 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } @@ -36,23 +74,32 @@ define i32 @bar() { ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. The first argument is stored at position 4, since it's right ; justified. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check multiple fixed arguments. declare i32 @foo2(i32 %g1, i32 %g2, ...) define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } -; CHECK-LABEL: @bar2 -; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll index b19da8e..41fb975 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll @@ -1,9 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" target triple = "mips64el--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 549755813888 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 549755813888 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 549755813888 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,46 +44,60 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check multiple fixed arguments. declare i32 @foo2(i32 %g1, i32 %g2, ...) define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } -; CHECK-LABEL: @bar2 -; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll index 9351067..19b07e1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll @@ -1,9 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -246290604621825 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 17592186044416 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 8796093022208 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP8]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], -246290604621825 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 17592186044416 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 8796093022208 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], -246290604621825 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 17592186044416 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 8796093022208 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,23 +50,29 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } @@ -36,14 +80,22 @@ define i32 @bar() { ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. The first argument is stored at position 4, since it's right ; justified. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check vector argument. define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) ret i32 %1 } @@ -51,50 +103,110 @@ define i32 @bar2() { ; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls ; corresponds to offset 8+ of parameter save area - so the offset from ; __msan_va_arg_tls is actually misaligned. -; CHECK-LABEL: @bar2 -; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i64 array. define i32 @bar4() { +; CHECK-LABEL: define i32 @bar4() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) ret i32 %1 } -; CHECK-LABEL: @bar4 -; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i128 array. define i32 @bar5() { +; CHECK-LABEL: define i32 @bar5() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) ret i32 %1 } -; CHECK-LABEL: @bar5 -; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 8-aligned byval. define i32 @bar6(ptr %arg) { +; CHECK-LABEL: define i32 @bar6( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 [[TMP11]], i64 16, i1 false) +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg) ret i32 %1 } -; CHECK-LABEL: @bar6 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false) -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 16-aligned byval. define i32 @bar7(ptr %arg) { +; CHECK-LABEL: define i32 @bar7( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false) +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg) ret i32 %1 } -; CHECK-LABEL: @bar7 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false) -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll index 4151f3b..1fe6385 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll @@ -1,9 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -246290604621825 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 17592186044416 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 8796093022208 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP8]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], -246290604621825 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 17592186044416 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 8796093022208 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], -246290604621825 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 17592186044416 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 8796093022208 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,37 +50,51 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check vector argument. define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) ret i32 %1 } @@ -50,49 +102,110 @@ define i32 @bar2() { ; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls ; corresponds to offset 8+ of parameter save area - so the offset from ; __msan_va_arg_tls is actually misaligned. -; CHECK-LABEL: @bar2 -; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i64 array. define i32 @bar4() { +; CHECK-LABEL: define i32 @bar4() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) ret i32 %1 } -; CHECK-LABEL: @bar4 -; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i128 array. define i32 @bar5() { +; CHECK-LABEL: define i32 @bar5() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) ret i32 %1 } -; CHECK-LABEL: @bar5 -; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 8-aligned byval. define i32 @bar6(ptr %arg) { +; CHECK-LABEL: define i32 @bar6( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 [[TMP11]], i64 16, i1 false) +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg) ret i32 %1 } -; CHECK-LABEL: @bar6 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false) -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 16-aligned byval. define i32 @bar7(ptr %arg) { +; CHECK-LABEL: define i32 @bar7( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false) +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg) ret i32 %1 } -; CHECK-LABEL: @bar7 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false) -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll index 4b7a910..a7209de 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; KMSAN instrumentation tests ; RUN: opt < %s -msan-kernel=1 -S -passes=msan 2>&1 | FileCheck %s -check-prefixes=CHECK @@ -6,309 +7,495 @@ target triple = "x86_64-unknown-linux-gnu" ; Check the instrumentation prologue. define void @Empty() nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Empty( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: ret void +; entry: ret void } -; CHECK-LABEL: @Empty -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; %param_shadow: -; CHECK: getelementptr {{.*}} i32 0, i32 0 -; %retval_shadow: -; CHECK: getelementptr {{.*}} i32 0, i32 1 -; %va_arg_shadow: -; CHECK: getelementptr {{.*}} i32 0, i32 2 -; %va_arg_origin: -; CHECK: getelementptr {{.*}} i32 0, i32 3 -; %va_arg_overflow_size: -; CHECK: getelementptr {{.*}} i32 0, i32 4 -; %param_origin: -; CHECK: getelementptr {{.*}} i32 0, i32 5 -; %retval_origin: -; CHECK: getelementptr {{.*}} i32 0, i32 6 - ; Check instrumentation of stores - define void @Store1(ptr nocapture %p, i8 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store1( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i8 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i8 [[TMP7]], ptr [[TMP14]], align 1 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: store i8 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; entry: store i8 %x, ptr %p ret void } -; CHECK-LABEL: @Store1 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: [[BASE:%[0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]] -; CHECK: [[SHADOW_PTR:%[a-z0-9_]+]] = inttoptr {{.*}} [[BASE]] -; CHECK: [[SHADOW:%[a-z0-9]+]] = load i64, ptr [[SHADOW_PTR]] -; CHECK: [[BASE2:%[0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: icmp ne i64 [[SHADOW]] -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_1(ptr %p) -; CHECK: store i8 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i32 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i8 -; CHECK: ret void - define void @Store2(ptr nocapture %p, i16 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store2( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i16 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_2(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i16 [[TMP7]], ptr [[TMP14]], align 2 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i16 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: store i16 [[X]], ptr [[P]], align 2 +; CHECK-NEXT: ret void +; entry: store i16 %x, ptr %p ret void } -; CHECK-LABEL: @Store2 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_2(ptr %p) -; CHECK: store i16 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i32 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i16 -; CHECK: ret void - - define void @Store4(ptr nocapture %p, i32 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store4( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 4 +; CHECK-NEXT: ret void +; entry: store i32 %x, ptr %p ret void } -; CHECK-LABEL: @Store4 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i32 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_4(ptr %p) -; CHECK: store i32 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i32 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i32 -; CHECK: ret void - define void @Store8(ptr nocapture %p, i64 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store8( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i64 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP18]], 32 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP15]], align 8 +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: store i64 [[X]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; entry: store i64 %x, ptr %p ret void } -; CHECK-LABEL: @Store8 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_8(ptr %p) -; CHECK: store i64 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i64 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i64 -; CHECK: ret void - define void @Store16(ptr nocapture %p, i128 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store16( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i128 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_n(ptr [[P]], i64 16) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i128 [[TMP7]], ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB22:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP18]], 32 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP15]], i32 1 +; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP21]], align 8 +; CHECK-NEXT: br label %[[BB22]] +; CHECK: [[BB22]]: +; CHECK-NEXT: store i128 [[X]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; entry: store i128 %x, ptr %p ret void } -; CHECK-LABEL: @Store16 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_n(ptr %p, i64 16) -; CHECK: store i128 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i64 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i128 -; CHECK: ret void - - ; Check instrumentation of loads define i8 @Load1(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i8 @Load1( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_1(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i8 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i8 [[TMP7]] +; entry: %0 = load i8, ptr %p ret i8 %0 } -; CHECK-LABEL: @Load1 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i8 -; CHECK: @__msan_metadata_ptr_for_load_1(ptr %p) -; Load the shadow and origin. -; CHECK: load i8 -; CHECK: load i32 - - define i16 @Load2(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i16 @Load2( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[P]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_2(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP9]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i16 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i16 [[TMP7]] +; entry: %0 = load i16, ptr %p ret i16 %0 } -; CHECK-LABEL: @Load2 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i16 -; CHECK: @__msan_metadata_ptr_for_load_2(ptr %p) -; Load the shadow and origin. -; CHECK: load i16 -; CHECK: load i32 - - define i32 @Load4(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i32 @Load4( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i32 [[TMP7]] +; entry: %0 = load i32, ptr %p ret i32 %0 } -; CHECK-LABEL: @Load4 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i32 -; CHECK: @__msan_metadata_ptr_for_load_4(ptr %p) -; Load the shadow and origin. -; CHECK: load i32 -; CHECK: load i32 - define i64 @Load8(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i64 @Load8( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i64 [[TMP7]] +; entry: %0 = load i64, ptr %p ret i64 %0 } -; CHECK-LABEL: @Load8 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i64 -; CHECK: @__msan_metadata_ptr_for_load_8(ptr %p) -; Load the shadow and origin. -; CHECK: load i64 -; CHECK: load i32 - define i128 @Load16(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i128 @Load16( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[P]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_n(ptr [[P]], i64 16) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i128, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 8 +; CHECK-NEXT: store i128 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i128 [[TMP7]] +; entry: %0 = load i128, ptr %p ret i128 %0 } -; CHECK-LABEL: @Load16 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i128 -; CHECK: @__msan_metadata_ptr_for_load_n(ptr %p, i64 16) -; Load the shadow and origin. -; CHECK: load i128 -; CHECK: load i32 - - ; Test kernel-specific va_list instrumentation %struct.__va_list_tag = type { i32, i32, ptr, ptr } @@ -319,6 +506,78 @@ declare dso_local i32 @VAListFn(ptr, ptr) local_unnamed_addr ; Function Attrs: nounwind uwtable define dso_local i32 @VarArgFn(ptr %fmt, ...) local_unnamed_addr sanitize_memory #0 { +; CHECK-LABEL: define dso_local i32 @VarArgFn( +; CHECK-SAME: ptr [[FMT:%.*]], ...) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VA_ARG_OVERFLOW_SIZE]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 48, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[TMP6]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP7]], i8 0, i64 [[TMP6]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP6]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP7]], ptr align 8 [[VA_ARG_SHADOW]], i64 [[TMP8]], i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = alloca i8, i64 [[TMP6]], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[VA_ARG_ORIGIN]], i64 [[TMP8]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @__msan_poison_alloca(ptr [[ARGS]], i64 24, ptr @[[GLOB0:[0-9]+]]) +; CHECK-NEXT: [[TMP10:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[ARGS]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { ptr, ptr } [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { ptr, ptr } [[TMP10]], 1 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 16 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { ptr, ptr } [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { ptr, ptr } [[TMP17]], 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP18]], ptr align 16 [[TMP7]], i64 48, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP19]], ptr align 16 [[TMP9]], i64 48, i1 false) +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { ptr, ptr } [[TMP24]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { ptr, ptr } [[TMP24]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP7]], i32 48 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP25]], ptr align 16 [[TMP27]], i64 [[TMP5]], i1 false) +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP9]], i32 48 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP26]], ptr align 16 [[TMP28]], i64 [[TMP5]], i1 false) +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP29]] to ptr +; CHECK-NEXT: store i64 [[TMP2]], ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP30]] to ptr +; CHECK-NEXT: store i32 [[TMP4]], ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], 8 +; CHECK-NEXT: [[_MSARG3:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: store i64 0, ptr [[_MSARG3]], align 8 +; CHECK-NEXT: store i32 0, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @VAListFn(ptr [[FMT]], ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: store i32 [[_MSRET]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP33]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i32 [[CALL]] +; entry: %args = alloca [1 x %struct.__va_list_tag], align 16 call void @llvm.va_start(ptr nonnull %args) @@ -330,52 +589,56 @@ entry: ; Kernel is built without SSE support. attributes #0 = { "target-features"="+fxsr,+x87,-sse" } -; CHECK-LABEL: @VarArgFn -; CHECK: @__msan_get_context_state() -; CHECK: [[VA_ARG_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 2 -; CHECK: [[VA_ARG_ORIGIN:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 3 -; CHECK: [[VA_ARG_OVERFLOW_SIZE:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 4 -; CHECK: [[OSIZE:%[0-9]+]] = load i64, ptr [[VA_ARG_OVERFLOW_SIZE]] ; Register save area is 48 bytes for non-SSE builds. -; CHECK: [[SIZE:%[0-9]+]] = add i64 48, [[OSIZE]] -; CHECK: [[SHADOWS:%[0-9]+]] = alloca i8, i64 [[SIZE]] -; CHECK: call void @llvm.memset{{.*}}(ptr align 8 [[SHADOWS]], i8 0, i64 [[SIZE]], i1 false) -; CHECK: [[COPYSZ:%[0-9]+]] = call i64 @llvm.umin.i64(i64 [[SIZE]], i64 800) -; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[SHADOWS]], ptr align 8 [[VA_ARG_SHADOW]], i64 [[COPYSZ]] -; CHECK: [[ORIGINS:%[0-9]+]] = alloca i8, i64 [[SIZE]] -; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[ORIGINS]], ptr align 8 [[VA_ARG_ORIGIN]], i64 [[COPYSZ]] -; CHECK: call i32 @VAListFn ; Function Attrs: nounwind uwtable define dso_local void @VarArgCaller() local_unnamed_addr sanitize_memory { +; CHECK-LABEL: define dso_local void @VarArgCaller( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: store i64 0, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: store i32 0, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VA_ARG_SHADOW]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 0 +; CHECK-NEXT: [[_MSARG_VA_S:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[VA_ARG_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 +; CHECK-NEXT: [[_MSARG_VA_O:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[VA_ARG_SHADOW]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_VA_S2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VA_ARG_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 8 +; CHECK-NEXT: [[_MSARG_VA_O3:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: store i32 0, ptr [[_MSARG_VA_S2]], align 8 +; CHECK-NEXT: store i32 0, ptr [[_MSARG_VA_O3]], align 8 +; CHECK-NEXT: store i64 0, ptr [[VA_ARG_OVERFLOW_SIZE]], align 8 +; CHECK-NEXT: store i32 0, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @VarArgFn(ptr @.str, i32 123) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret void +; entry: %call = tail call i32 (ptr, ...) @VarArgFn(ptr @.str, i32 123) ret void } -; CHECK-LABEL: @VarArgCaller - -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: [[VA_ARG_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 2 -; CHECK: [[VA_ARG_OVERFLOW_SIZE:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 4 - -; CHECK: [[PARAM_SI:%[_a-z0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]] -; CHECK: [[ARG1_S:%[_a-z0-9]+]] = inttoptr i64 [[PARAM_SI]] to ptr -; First argument is initialized -; CHECK: store i64 0, ptr [[ARG1_S]] - -; Dangling cast of va_arg_shadow[0], unused because the first argument is fixed. -; CHECK: [[VA_CAST0:%[_a-z0-9]+]] = ptrtoint {{.*}} [[VA_ARG_SHADOW]] to i64 - -; CHECK: [[VA_CAST1:%[_a-z0-9]+]] = ptrtoint {{.*}} [[VA_ARG_SHADOW]] to i64 -; CHECK: [[ARG1_SI:%[_a-z0-9]+]] = add i64 [[VA_CAST1]], 8 -; CHECK: [[PARG1_S:%[_a-z0-9]+]] = inttoptr i64 [[ARG1_SI]] to ptr - -; Shadow for 123 is 0. -; CHECK: store i32 0, ptr [[ARG1_S]] - -; CHECK: store i64 0, ptr [[VA_ARG_OVERFLOW_SIZE]] -; CHECK: call i32 (ptr, ...) @VarArgFn({{.*}} @.str{{.*}} i32 123) +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/TableGen/CPtrWildcard.td b/llvm/test/TableGen/CPtrWildcard.td index 96b51ae..230a673 100644 --- a/llvm/test/TableGen/CPtrWildcard.td +++ b/llvm/test/TableGen/CPtrWildcard.td @@ -5,19 +5,19 @@ // CHECK: static const unsigned char MatcherTable[] = { // CHECK-NEXT: /* 0*/ OPC_CheckOpcode, TARGET_VAL(ISD::INTRINSIC_WO_CHAIN), -// CHECK-NEXT:/* 3*/ OPC_CheckChild0Integer, 42, +// CHECK-NEXT:/* 3*/ OPC_CheckChild0Integer, [[#]], // CHECK-NEXT:/* 5*/ OPC_RecordChild1, // #0 = $src // CHECK-NEXT:/* 6*/ OPC_Scope, 9, /*->17*/ // 2 children in Scope // CHECK-NEXT:/* 8*/ OPC_CheckChild1Type, /*MVT::c64*/126|128,1/*254*/, // CHECK-NEXT:/* 11*/ OPC_MorphNodeTo1None, TARGET_VAL(MyTarget::C64_TO_I64), // CHECK-NEXT: /*MVT::i64*/8, 1/*#Ops*/, 0, -// CHECK-NEXT: // Src: (intrinsic_wo_chain:{ *:[i64] } 21:{ *:[iPTR] }, c64:{ *:[c64] }:$src) - Complexity = 8 +// CHECK-NEXT: // Src: (intrinsic_wo_chain:{ *:[i64] } [[#]]:{ *:[iPTR] }, c64:{ *:[c64] }:$src) - Complexity = 8 // CHECK-NEXT: // Dst: (C64_TO_I64:{ *:[i64] } ?:{ *:[c64] }:$src) // CHECK-NEXT:/* 17*/ /*Scope*/ 9, /*->27*/ // CHECK-NEXT:/* 18*/ OPC_CheckChild1Type, /*MVT::c128*/127|128,1/*255*/, // CHECK-NEXT:/* 21*/ OPC_MorphNodeTo1None, TARGET_VAL(MyTarget::C128_TO_I64), // CHECK-NEXT: /*MVT::i64*/8, 1/*#Ops*/, 0, -// CHECK-NEXT: // Src: (intrinsic_wo_chain:{ *:[i64] } 21:{ *:[iPTR] }, c128:{ *:[c128] }:$src) - Complexity = 8 +// CHECK-NEXT: // Src: (intrinsic_wo_chain:{ *:[i64] } [[#]]:{ *:[iPTR] }, c128:{ *:[c128] }:$src) - Complexity = 8 // CHECK-NEXT: // Dst: (C128_TO_I64:{ *:[i64] } ?:{ *:[c128] }:$src) // CHECK-NEXT:/* 27*/ 0, /*End of Scope*/ // CHECK-NEXT: 0 diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll b/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll new file mode 100644 index 0000000..346eaea --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +; Make sure that we apply trunc to the edge value of %x. +@g = external global i8 + +define i16 @pr161367(i64 %x) { +; CHECK-LABEL: define i16 @pr161367( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[X]] to i16 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[X]], sub (i64 0, i64 ptrtoint (ptr @g to i64)) +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RET:%.*]] = phi i16 [ trunc (i64 sub (i64 0, i64 ptrtoint (ptr @g to i64)) to i16), %[[ENTRY]] ], [ 0, %[[ELSE]] ] +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + %trunc = trunc i64 %x to i16 + %exitcond = icmp eq i64 %x, sub (i64 0, i64 ptrtoint (ptr @g to i64)) + br i1 %exitcond, label %exit, label %else + +else: + br label %exit + +exit: + %ret = phi i16 [ %trunc, %entry ], [ 0, %else ] + ret i16 %ret +} diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 60a4214..8113ba65 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -1398,5 +1398,73 @@ define void @assume_nonnull(ptr %p) { ret void } +define void @captures_metadata_address_is_null(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address_is_null +; FNATTRS-SAME: (ptr captures(address_is_null) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address_is_null +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address_is_null"} + ret void +} + +define void @captures_metadata_address(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address +; FNATTRS-SAME: (ptr captures(address) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address"} + ret void +} + +define void @captures_metadata_address_read_provenance(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address_read_provenance +; FNATTRS-SAME: (ptr captures(address, read_provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address_read_provenance +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"} + ret void +} + +define void @captures_metadata_provenance(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_provenance +; FNATTRS-SAME: (ptr captures(provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_provenance +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"provenance"} + ret void +} + declare ptr @llvm.launder.invariant.group.p0(ptr) declare ptr @llvm.strip.invariant.group.p0(ptr) diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll index 854357e..edbd602 100644 --- a/llvm/test/Transforms/GlobalOpt/fastcc.ll +++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll @@ -1,16 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=globalopt -S | FileCheck %s declare token @llvm.call.preallocated.setup(i32) declare ptr @llvm.call.preallocated.arg(token, i32) define internal i32 @f(ptr %m) { -; CHECK-LABEL: define internal fastcc i32 @f +; CHECK-LABEL: define internal fastcc i32 @f( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal x86_thiscallcc i32 @g(ptr %m) { -; CHECK-LABEL: define internal fastcc i32 @g +; CHECK-LABEL: define internal fastcc i32 @g( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } @@ -18,41 +27,80 @@ define internal x86_thiscallcc i32 @g(ptr %m) { ; Leave this one alone, because the user went out of their way to request this ; convention. define internal coldcc i32 @h(ptr %m) { -; CHECK-LABEL: define internal coldcc i32 @h +; CHECK-LABEL: define internal coldcc i32 @h( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal i32 @j(ptr %m) { -; CHECK-LABEL: define internal i32 @j +; CHECK-LABEL: define internal i32 @j( +; CHECK-SAME: ptr [[M:%.*]]) { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal i32 @inalloca(ptr inalloca(i32) %p) { -; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p) +; CHECK-LABEL: define internal fastcc i32 @inalloca( +; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define i32 @inalloca2_caller(ptr inalloca(i32) %p) { +; CHECK-LABEL: define i32 @inalloca2_caller( +; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]]) +; CHECK-NEXT: ret i32 [[RV]] +; %rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p) ret i32 %rv } define internal i32 @inalloca2(ptr inalloca(i32) %p) { ; Because of the musttail caller, this inalloca cannot be dropped. -; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p) +; CHECK-LABEL: define internal i32 @inalloca2( +; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define internal i32 @preallocated(ptr preallocated(i32) %p) { -; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p) +; CHECK-LABEL: define internal fastcc i32 @preallocated( +; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define void @call_things() { +; CHECK-LABEL: define void @call_things() local_unnamed_addr { +; CHECK-NEXT: [[M:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]]) +; CHECK-NEXT: [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @j(ptr [[M]]) +; CHECK-NEXT: [[ARGS:%.*]] = alloca inalloca i32, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]]) +; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave.p0() +; CHECK-NEXT: [[PAARG:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]]) +; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP6]]) +; CHECK-NEXT: ret void +; %m = alloca i32 call i32 @f(ptr %m) call x86_thiscallcc i32 @g(ptr %m) @@ -65,15 +113,25 @@ define void @call_things() { call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)] ret void } -; CHECK-LABEL: define void @call_things() -; CHECK: call fastcc i32 @f -; CHECK: call fastcc i32 @g -; CHECK: call coldcc i32 @h -; CHECK: call i32 @j -; CHECK: call fastcc i32 @inalloca(ptr %args) -; CHECK-NOT: llvm.call.preallocated -; CHECK: call fastcc i32 @preallocated(ptr %paarg) @llvm.used = appending global [1 x ptr] [ - ptr @j + ptr @j ], section "llvm.metadata" + +define internal i32 @assume_fastcc() { +; CHECK-LABEL: define internal fastcc i32 @assume_fastcc() { +; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret i32 [[OBJSIZE]] +; + %objsize = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false) + ret i32 %objsize +} + +define internal i32 @constexpr_self_user() addrspace(1) { +; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) { +; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false) +; CHECK-NEXT: ret i32 [[OBJSIZE]] +; + %objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false) + ret i32 %objsize +} diff --git a/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll b/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll index 4a59e41..cb4e07e 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll @@ -1,10 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=indvars < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @f_sadd(ptr %a) { -; CHECK-LABEL: @f_sadd( +; CHECK-LABEL: define void @f_sadd( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br i1 false, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0:![0-9]+]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 16 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; entry: br label %for.body @@ -18,9 +37,6 @@ for.body: ; preds = %entry, %cont store i8 0, ptr %arrayidx, align 1 %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) %1 = extractvalue { i32, i1 } %0, 1 -; CHECK: for.body: -; CHECK-NOT: @llvm.sadd.with.overflow -; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 br i1 %1, label %trap, label %cont, !nosanitize !{} trap: ; preds = %for.body @@ -33,8 +49,71 @@ cont: ; preds = %for.body br i1 %cmp, label %for.body, label %for.cond.cleanup } +define void @f_sadd_overflow(ptr %a) { +; CHECK-LABEL: define void @f_sadd_overflow( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 2147483645, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 2147483647 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br i1 true, label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 2147483645, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + store i8 0, ptr %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp sle i32 %2, 2147483647 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + define void @f_uadd(ptr %a) { -; CHECK-LABEL: @f_uadd( +; CHECK-LABEL: define void @f_uadd( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br i1 false, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 16 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; entry: br label %for.body @@ -48,9 +127,6 @@ for.body: ; preds = %entry, %cont store i8 0, ptr %arrayidx, align 1 %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1) %1 = extractvalue { i32, i1 } %0, 1 -; CHECK: for.body: -; CHECK-NOT: @llvm.uadd.with.overflow -; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 br i1 %1, label %trap, label %cont, !nosanitize !{} trap: ; preds = %for.body @@ -63,8 +139,71 @@ cont: ; preds = %for.body br i1 %cmp, label %for.body, label %for.cond.cleanup } +define void @f_uadd_overflow(ptr %a) { +; CHECK-LABEL: define void @f_uadd_overflow( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ -6, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br i1 true, label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 4294967290, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + store i8 0, ptr %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp ule i32 %2, 4294967295 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + define void @f_ssub(ptr nocapture %a) { -; CHECK-LABEL: @f_ssub( +; CHECK-LABEL: define void @f_ssub( +; CHECK-SAME: ptr captures(none) [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 15, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 false, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV_NEXT]], -1 +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; entry: br label %for.body @@ -78,9 +217,6 @@ for.body: ; preds = %entry, %cont store i8 0, ptr %arrayidx, align 1 %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1) %1 = extractvalue { i32, i1 } %0, 1 -; CHECK: for.body: -; CHECK-NOT: @llvm.ssub.with.overflow.i32 -; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 br i1 %1, label %trap, label %cont, !nosanitize !{} trap: ; preds = %for.body @@ -93,8 +229,76 @@ cont: ; preds = %for.body br i1 %cmp, label %for.body, label %for.cond.cleanup } +; It is theoretically possible to replace the `ssub.with.overflow` with a +; condition on the IV, but SCEV cannot represent non-unsigned-wrapping +; subtraction operations. +define void @f_ssub_overflow(ptr nocapture %a) { +; CHECK-LABEL: define void @f_ssub_overflow( +; CHECK-SAME: ptr captures(none) [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ -2147483642, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: br i1 [[TMP2]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 true, label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ -2147483642, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + store i8 0, ptr %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp sge i32 %2, -2147483648 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + define void @f_usub(ptr nocapture %a) { -; CHECK-LABEL: @f_usub( +; CHECK-LABEL: define void @f_usub( +; CHECK-SAME: ptr captures(none) [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 15, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 false, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt i64 [[INDVARS_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; entry: br label %for.body @@ -109,9 +313,6 @@ for.body: ; preds = %entry, %cont %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1) %1 = extractvalue { i32, i1 } %0, 1 -; CHECK: for.body: -; CHECK-NOT: @llvm.usub.with.overflow.i32 -; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 br i1 %1, label %trap, label %cont, !nosanitize !{} trap: ; preds = %for.body @@ -124,8 +325,31 @@ cont: ; preds = %for.body br i1 %cmp, label %for.body, label %for.cond.cleanup } +; It is theoretically possible to replace the `usub.with.overflow` with a +; condition on the IV, but SCEV cannot represent non-unsigned-wrapping +; subtraction operations. define void @f_usub_overflow(ptr nocapture %a) { -; CHECK-LABEL: @f_usub_overflow( +; CHECK-LABEL: define void @f_usub_overflow( +; CHECK-SAME: ptr captures(none) [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 15, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: br i1 [[TMP2]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]] +; CHECK: [[TRAP]]: +; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]] +; CHECK-NEXT: unreachable, !nosanitize [[META0]] +; CHECK: [[CONT]]: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 true, label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] +; entry: br label %for.body @@ -139,13 +363,6 @@ for.body: ; preds = %entry, %cont store i8 0, ptr %arrayidx, align 1 %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1) %1 = extractvalue { i32, i1 } %0, 1 - -; It is theoretically possible to prove this, but SCEV cannot -; represent non-unsigned-wrapping subtraction operations. - -; CHECK: for.body: -; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1 -; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0 br i1 %1, label %trap, label %cont, !nosanitize !{} trap: ; preds = %for.body @@ -166,3 +383,6 @@ declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone declare void @llvm.trap() #2 +;. +; CHECK: [[META0]] = !{} +;. diff --git a/llvm/test/Transforms/InstCombine/in-freeze-phi.ll b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll new file mode 100644 index 0000000..917d81b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define i32 @phi_freeze_same_consts(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_same_consts( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 42 +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 42, %cB ] + ret i32 %phi +} + +define i32 @phi_freeze_mixed_consts(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_mixed_consts( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ 42, %[[CA]] ], [ 7, %[[CB]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 7, %cB ] + ret i32 %phi +} + +define i32 @phi_freeze_with_nonconst_incoming(i32 %x, i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_with_nonconst_incoming( +; CHECK-SAME: i32 [[X:%.*]], i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ [[X]], %[[CA]] ], [ 13, %[[CB]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ %x, %cA ], [ 13, %cB ] + ret i32 %phi +} + +define <4 x i8> @phi_freeze_vector(i1 %c0, i1 %c1) { +; CHECK-LABEL: define <4 x i8> @phi_freeze_vector( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret <4 x i8> splat (i8 9) +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze <4 x i8> undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB + +cA: + br label %final + +cB: + br label %final + +final: + %phi = phi <4 x i8> [ %f, %bb_freeze ], + [<i8 9, i8 9, i8 9, i8 9>, %cA ], + [<i8 9, i8 9, i8 9, i8 9>, %cB ] + ret <4 x i8> %phi +} + +define i32 @multi_use_one_folds_one_not_zero(i1 %c0, i1 %c1, i1 %c2) { +; CHECK-LABEL: define i32 @multi_use_one_folds_one_not_zero( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_OTHER3:.*]], label %[[CC1:.*]] +; CHECK: [[BB_OTHER3]]: +; CHECK-NEXT: br label %[[MID:.*]] +; CHECK: [[CC1]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[MID]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[MID]] +; CHECK: [[MID]]: +; CHECK-NEXT: [[PHI_FOLD:%.*]] = phi i32 [ 0, %[[BB_OTHER3]] ], [ 1, %[[CA]] ], [ 1, %[[CB]] ] +; CHECK-NEXT: br i1 [[C2]], label %[[BB_FREEZE2:.*]], label %[[CD:.*]] +; CHECK: [[BB_FREEZE2]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER2:.*:]] +; CHECK-NEXT: br i1 true, label %[[CA]], label %[[CB]] +; CHECK: [[CC:.*:]] +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CD]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 [[PHI_FOLD]] +; +entry: + %f = freeze i32 undef + br i1 %c0, label %bb_freeze, label %bb_other +bb_freeze: + br label %mid +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %mid +cB: + br label %mid +mid: + %phi_no_fold = phi i32 [ %f, %bb_freeze ], [ 1, %cA ], [ 1, %cB ] + br i1 %c2, label %bb_freeze2, label %cD +bb_freeze2: + br label %final +bb_other2: + br i1 %c1, label %cA, label %cB +cC: + br label %final +cD: + br label %final +final: + %phi_fold = phi i32 [ %f, %bb_freeze2 ], [ 0, %cC ], [ 0, %cD ] + %a = add i32 %phi_fold, %phi_no_fold + ret i32 %a +} + +define i32 @phi_freeze_poison(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_poison( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ poison, %cA ], [ poison, %cB ] + ret i32 %phi +} + +define <2 x i32> @phi_freeze_poison_vec(i1 %c0, i1 %c1) { +; CHECK-LABEL: define <2 x i32> @phi_freeze_poison_vec( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB_FREEZE]] ], [ <i32 poison, i32 1>, %[[CA]] ], [ <i32 poison, i32 1>, %[[CB]] ] +; CHECK-NEXT: ret <2 x i32> [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze <2 x i32> undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi <2 x i32> [ %f, %bb_freeze ], [ <i32 poison, i32 1>, %cA ], [ <i32 poison, i32 1>, %cB ] + ret <2 x i32> %phi +} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index d34ac2b..85c8ed2 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -424,6 +424,174 @@ join: ret ptr %phi } +define void @hoist_captures_same(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_same( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + +define void @hoist_captures_different(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_different( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_overlap(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_overlap( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_subsume1(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_subsume1( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address_is_null"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + +define void @hoist_captures_subsume2(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_subsume2( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META11:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"provenance"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_full_set(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_full_set( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_only_one1(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_only_one1( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y + br label %out + +out: + ret void +} + +define void @hoist_captures_only_one2(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_only_one2( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } !2 = !{} @@ -445,4 +613,7 @@ join: ; CHECK: [[META6]] = !{float 2.500000e+00} ; CHECK: [[META7]] = !{i32 5, i32 6} ; CHECK: [[META8]] = !{i32 4, i32 5} +; CHECK: [[META9]] = !{!"address"} +; CHECK: [[META10]] = !{!"address", !"read_provenance"} +; CHECK: [[META11]] = !{!"provenance"} ;. diff --git a/llvm/test/Verifier/captures-metadata.ll b/llvm/test/Verifier/captures-metadata.ll new file mode 100644 index 0000000..ae08ddd --- /dev/null +++ b/llvm/test/Verifier/captures-metadata.ll @@ -0,0 +1,37 @@ +; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s + +; CHECK: !captures metadata can only be applied to store instructions +define void @wrong_instr_type(ptr %x) { + load ptr, ptr %x, !captures !{!"address"} + ret void +} + +; CHECK: captures metadata can only be applied to store with value operand of pointer type +define void @wrong_op_type(i32 %x, ptr %y) { + store i32 %x, ptr %y, !captures !{!"address"} + ret void +} + +; CHECK: !captures metadata cannot be empty +define void @empty(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{} + ret void +} + +; CHECK: !captures metadata must be a list of strings +define void @not_string(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{!{}} + ret void +} + +; CHECK: invalid entry in !captures metadata +define void @invalid_str(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{!"foo"} + ret void +} + +; CHECK: invalid entry in !captures metadata +define void @invalid_none(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{!"none"} + ret void +} |