; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s define i64 @select_or_reduce_v2i1(ptr nocapture noundef readonly %src) { ; CHECK-LABEL: select_or_reduce_v2i1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q0, [x0, x8] ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: tbnz w9, #0, .LBB0_3 ; CHECK-NEXT: // %bb.2: // %vector.body ; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: .LBB0_3: // %middle.split ; CHECK-NEXT: and x0, x9, #0x1 ; CHECK-NEXT: ret entry: br label %vector.body vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) %index.next = add nuw i64 %index, 2 %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) %iv.cmp = icmp eq i64 %index.next, 4 %exit.cond = or i1 %or.reduc, %iv.cmp br i1 %exit.cond, label %middle.split, label %vector.body middle.split: %sel = select i1 %or.reduc, i64 1, i64 0 ret i64 %sel } define i64 @br_or_reduce_v2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { ; CHECK-LABEL: br_or_reduce_v2i1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q0, [x0, x8] ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 ; CHECK-NEXT: umaxv s0, v0.4s ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: tbnz w9, #0, .LBB1_3 ; CHECK-NEXT: // %bb.2: // %vector.body ; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: .LBB1_3: // %middle.split ; CHECK-NEXT: tbz w9, #0, .LBB1_5 ; CHECK-NEXT: // %bb.4: // %found ; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_5: ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret entry: br label %vector.body vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) %index.next = add nuw i64 %index, 2 %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) %iv.cmp = icmp eq i64 %index.next, 4 %exit.cond = or i1 %or.reduc, %iv.cmp br i1 %exit.cond, label %middle.split, label %vector.body middle.split: br i1 %or.reduc, label %found, label %notfound found: store i64 56, ptr %p, align 8 ret i64 1 notfound: ret i64 0 } define i64 @select_or_reduce_nxv2i1(ptr nocapture noundef readonly %src) { ; CHECK-LABEL: select_or_reduce_nxv2i1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: neg x10, x8 ; CHECK-NEXT: add x10, x10, #4 ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 ; CHECK-NEXT: b.ne .LBB2_3 ; CHECK-NEXT: // %bb.2: // %vector.body ; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: add x9, x9, x8 ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: .LBB2_3: // %middle.split ; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret entry: %vscale = tail call i64 @llvm.vscale.i64() %vf = shl nuw nsw i64 %vscale, 1 br label %vector.body vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index %wide.load = load , ptr %arrayidx, align 8 %cond = icmp eq %wide.load, splat(ptr zeroinitializer) %index.next = add nuw i64 %index, %vf %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1( %cond) %iv.cmp = icmp eq i64 %index.next, 4 %exit.cond = or i1 %or.reduc, %iv.cmp br i1 %exit.cond, label %middle.split, label %vector.body middle.split: %sel = select i1 %or.reduc, i64 1, i64 0 ret i64 %sel } define i64 @br_or_reduce_nxv2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { ; CHECK-LABEL: br_or_reduce_nxv2i1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: neg x10, x8 ; CHECK-NEXT: add x10, x10, #4 ; CHECK-NEXT: .LBB3_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 ; CHECK-NEXT: b.ne .LBB3_3 ; CHECK-NEXT: // %bb.2: // %vector.body ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: add x9, x9, x8 ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: .LBB3_3: // %middle.split ; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: b.eq .LBB3_5 ; CHECK-NEXT: // %bb.4: // %found ; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_5: ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret entry: %vscale = tail call i64 @llvm.vscale.i64() %vf = shl nuw nsw i64 %vscale, 1 br label %vector.body vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index %wide.load = load , ptr %arrayidx, align 8 %cond = icmp eq %wide.load, splat(ptr zeroinitializer) %index.next = add nuw i64 %index, %vf %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1( %cond) %iv.cmp = icmp eq i64 %index.next, 4 %exit.cond = or i1 %or.reduc, %iv.cmp br i1 %exit.cond, label %middle.split, label %vector.body middle.split: br i1 %or.reduc, label %found, label %notfound found: store i64 56, ptr %p, align 8 ret i64 1 notfound: ret i64 0 } declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) declare i1 @llvm.vector.reduce.or.nxv2i1()