diff options
Diffstat (limited to 'llvm/test/Transforms')
17 files changed, 1058 insertions, 290 deletions
diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll index 2b22965..dcfd933 100644 --- a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll +++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll @@ -21,6 +21,28 @@ define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) { ret i16 %trunc } +; Will assert if InsertPoint is not set before creating an instruction +; with IRBuilder +define i16 @selective_shift_16_insertpt(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16_insertpt( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[ADD_ONE:%.*]] = add i16 [[SEL_V]], 1 +; CHECK-NEXT: ret i16 [[ADD_ONE]] +; + %mask.bit = and i32 %mask, 16 + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %sel = lshr i32 %pack, %mask.bit + %add.one = add i32 %sel, 1 + %trunc = trunc i32 %add.one to i16 + ret i16 %trunc +} + define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) { ; CHECK-LABEL: define i16 @selective_shift_16.commute( ; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index f19cca8..a7434a2 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -4,6 +4,10 @@ ; The ptrtoaddr folds are also valid for pointers that have external state. target datalayout = "pe1:64:64:64:32" +declare void @use.i1(i1) +declare void @use.i32(i32) +declare void @use.i64(i64) + ; ptrtoaddr result type is fixed, and can't be combined with integer cast. define i32 @ptrtoaddr_trunc(ptr %p) { ; CHECK-LABEL: define i32 @ptrtoaddr_trunc( @@ -171,3 +175,65 @@ define i128 @sub_zext_ptrtoint_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offs %sub = sub i128 %p2.addr.ext, %p.int.ext ret i128 %sub } + +; The uses in icmp, ptrtoint, ptrtoaddr should be replaced. The one in the +; return value should not, as the provenance differs. +define ptr @gep_sub_ptrtoaddr_different_obj(ptr %p, ptr %p2, ptr %p3) { +; CHECK-LABEL: define ptr @gep_sub_ptrtoaddr_different_obj( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]], ptr [[P3:%.*]]) { +; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[P2_ADDR]], [[P_ADDR]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[SUB]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P2]], [[P3]] +; CHECK-NEXT: call void @use.i1(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[P2]] to i64 +; CHECK-NEXT: call void @use.i64(i64 [[INT]]) +; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: call void @use.i64(i64 [[ADDR]]) +; CHECK-NEXT: ret ptr [[GEP]] +; + %p.addr = ptrtoaddr ptr %p to i64 + %p2.addr = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %p2.addr, %p.addr + %gep = getelementptr i8, ptr %p, i64 %sub + %cmp = icmp eq ptr %gep, %p3 + call void @use.i1(i1 %cmp) + %int = ptrtoint ptr %gep to i64 + call void @use.i64(i64 %int) + %addr = ptrtoaddr ptr %gep to i64 + call void @use.i64(i64 %addr) + ret ptr %gep +} + +; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should +; not be replaced, as the non-address bits differ. The use in the return value +; should not be replaced as the provenace differs. +define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) { +; CHECK-LABEL: define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]], ptr addrspace(1) [[P3:%.*]]) { +; CHECK-NEXT: [[P_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32 +; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]] +; CHECK-NEXT: call void @use.i1(i1 [[CMP]]) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64 +; CHECK-NEXT: [[INT:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: call void @use.i32(i32 [[INT]]) +; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32 +; CHECK-NEXT: call void @use.i32(i32 [[ADDR]]) +; CHECK-NEXT: ret ptr addrspace(1) [[GEP]] +; + %p.addr = ptrtoaddr ptr addrspace(1) %p to i32 + %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32 + %sub = sub i32 %p2.addr, %p.addr + %gep = getelementptr i8, ptr addrspace(1) %p, i32 %sub + %cmp = icmp eq ptr addrspace(1) %gep, %p3 + call void @use.i1(i1 %cmp) + %int = ptrtoint ptr addrspace(1) %gep to i32 + call void @use.i32(i32 %int) + %addr = ptrtoaddr ptr addrspace(1) %gep to i32 + call void @use.i32(i32 %addr) + ret ptr addrspace(1) %gep +} diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll new file mode 100644 index 0000000..db4965b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll @@ -0,0 +1,417 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=instcombine | FileCheck %s +@A = extern_weak global float, align 4 + +; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true +; and false values, while %v1 and %phi.to.remove are actually the same. +; Fold the selection instruction %same.as.v1 to %v1. +define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; Reduced from select_with_identical_phi(). +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_2(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %v1.1 = select i1 %cmp2, float %x, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi_2() is that the true and false values in +; %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_3(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[V1]], float [[X:%.*]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %v1.1 = select i1 %cmp2, float %v1, float %x + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %x + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi_2() is that the true and false values in +; same.as.v1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_4(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %v1.1 = select i1 %cmp2, float %x, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_5(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_5( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[V1]], float [[X:%.*]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %v1.1 = select i1 %cmp2, float %v1, float %x + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %x + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi_2() is that not all phis are sorted with +; the same order of incoming BBs. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_6(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_6( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ %phi.to.remove.next, %for.body ], [ 0xC415AF1D80000000, %entry ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %v1.1 = select i1 %cmp2, float %x, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; %v1 and %phi.to.remove do not have the same start value. +; Cannot fold %same.as.v1. +define void @select_with_identical_phi_negative_1(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_negative_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V1]], float [[PHI_TO_REMOVE]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]] +; CHECK-NEXT: [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2]], float [[X]], float [[SAME_AS_V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0x4415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %v1.1 = select i1 %cmp2, float %x, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; %v1 and %phi.to.remove do not act as the same phi since %v1.1 and %phi.to.remove.next do not evolve the same. +; Cannot fold %same.as.v1. +define void @select_with_identical_phi_negative_2(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_negative_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V1]], float [[PHI_TO_REMOVE]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[V1]], float [[X:%.*]] +; CHECK-NEXT: [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2]], float [[X]], float [[SAME_AS_V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %v1.1 = select i1 %cmp2, float %v1, float %x + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; %v1 and %phi.to.remove do not act as the same phi since %v1.1 and %phi.to.remove.next do not +; have the same condition. +; Cannot fold %same.as.v1. +define void @select_with_identical_phi_negative_3(i1 %cmp1, i1 %cmp2, i1 %cmp3, float %x) { +; CHECK-LABEL: @select_with_identical_phi_negative_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V1]], float [[PHI_TO_REMOVE]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP3:%.*]], float [[V1]], float [[X:%.*]] +; CHECK-NEXT: [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2:%.*]], float [[X]], float [[SAME_AS_V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %v1.1 = select i1 %cmp3, float %v1, float %x + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The true and false values of %same.as.v1 are not really the same phi. +; Cannot fold %same.as.v1. +define void @select_with_identical_phi_negative_4(i1 %cmp1, i1 %cmp2, float %x) { +; CHECK-LABEL: @select_with_identical_phi_negative_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V0]], float [[PHI_TO_REMOVE]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]] +; CHECK-NEXT: [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2]], float [[X]], float [[SAME_AS_V1]] +; CHECK-NEXT: [[V0_1]] = fadd float [[V0]], 1.000000e+00 +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %same.as.v1 = select i1 %cmp1, float %v0, float %phi.to.remove + %v1.1 = select i1 %cmp2, float %x, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1 + %v0.1 = fadd float %v0, 1.0 + %inc.i = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc.i, 100 + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll index 516f1a2..4128a15 100644 --- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll +++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll @@ -144,6 +144,17 @@ define i24 @wide_source_matching_signbits(i32 %x) { ret i24 %c } +define i32 @wide_source_matching_signbits_has_nsw_flag(i64 %i) { +; CHECK-LABEL: define i32 @wide_source_matching_signbits_has_nsw_flag( +; CHECK-SAME: i64 [[I:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = trunc nsw i64 [[I]] to i32 +; CHECK-NEXT: ret i32 [[A]] +; + %a = trunc nsw i64 %i to i16 + %b = sext i16 %a to i32 + ret i32 %b +} + ; negative test - not enough sign-bits define i24 @wide_source_not_matching_signbits(i32 %x) { diff --git a/llvm/test/Transforms/InstCombine/trunc-sext.ll b/llvm/test/Transforms/InstCombine/trunc-sext.ll new file mode 100644 index 0000000..ac143840 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/trunc-sext.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +target datalayout = "i16:16:16-i32:32:32-i64:64:64-n16:32:64" + +define i32 @test(i64 %i) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i64 [[I:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = trunc i64 [[I]] to i16 +; CHECK-NEXT: [[B:%.*]] = sext i16 [[A]] to i32 +; CHECK-NEXT: ret i32 [[B]] +; + %a = trunc i64 %i to i16 + %b = sext i16 %a to i32 + ret i32 %b +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll index 5a2eee0..2f5a58c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll @@ -37,7 +37,6 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali ; ; CHECK-ARMPL-LABEL: define void @sincos_f32( ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-ARMPL: [[ENTRY:.*:]] ; CHECK-ARMPL: [[VECTOR_PH:.*:]] ; CHECK-ARMPL: [[VECTOR_BODY:.*:]] ; CHECK-ARMPL: [[VECTOR_BODY1:.*:]] @@ -51,15 +50,6 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4 ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4 ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[VEC_EPILOG_VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> [[WIDE_LOAD3:%.*]]) -; CHECK-ARMPL: [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0 -; CHECK-ARMPL: [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1 -; CHECK-ARMPL: store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4 -; CHECK-ARMPL: store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4 ; CHECK-ARMPL: [[VEC_EPILOG_MIDDLE_BLOCK:.*:]] ; CHECK-ARMPL: [[VEC_EPILOG_SCALAR_PH:.*:]] ; CHECK-ARMPL: [[FOR_BODY1:.*:]] @@ -269,7 +259,6 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias ; ; CHECK-ARMPL-LABEL: define void @modf_f32( ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK-ARMPL: [[ENTRY:.*:]] ; CHECK-ARMPL: [[VECTOR_PH:.*:]] ; CHECK-ARMPL: [[VECTOR_BODY:.*:]] ; CHECK-ARMPL: [[VECTOR_BODY1:.*:]] @@ -283,15 +272,6 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4 ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4 ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[VEC_EPILOG_VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> [[WIDE_LOAD3:%.*]]) -; CHECK-ARMPL: [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0 -; CHECK-ARMPL: [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1 -; CHECK-ARMPL: store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4 -; CHECK-ARMPL: store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4 ; CHECK-ARMPL: [[VEC_EPILOG_MIDDLE_BLOCK:.*:]] ; CHECK-ARMPL: [[VEC_EPILOG_SCALAR_PH:.*:]] ; CHECK-ARMPL: [[FOR_BODY1:.*:]] @@ -429,7 +409,6 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa ; ; CHECK-ARMPL-LABEL: define void @sincospi_f32( ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK-ARMPL: [[ENTRY:.*:]] ; CHECK-ARMPL: [[VECTOR_PH:.*:]] ; CHECK-ARMPL: [[VECTOR_BODY:.*:]] ; CHECK-ARMPL: [[VECTOR_BODY1:.*:]] @@ -443,15 +422,6 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4 ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4 ; CHECK-ARMPL: store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[VEC_EPILOG_VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> [[WIDE_LOAD3:%.*]]) -; CHECK-ARMPL: [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0 -; CHECK-ARMPL: [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1 -; CHECK-ARMPL: store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4 -; CHECK-ARMPL: store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4 ; CHECK-ARMPL: [[VEC_EPILOG_MIDDLE_BLOCK:.*:]] ; CHECK-ARMPL: [[VEC_EPILOG_SCALAR_PH:.*:]] ; CHECK-ARMPL: [[FOR_BODY1:.*:]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index c1f0a35..0d8a102 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -9,49 +9,57 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-LABEL: define void @cost_store_i8( ; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { ; DEFAULT-NEXT: iter.check: -; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP10]], 3 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP13]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; DEFAULT: vector.main.loop.iter.check: -; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP3]] -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP5]] +; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]] ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4 -; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP9]], i64 [[TMP23]] -; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP9]], align 1 -; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP24]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] +; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 +; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP6]] +; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP4]], align 1 +; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP7]], align 1 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; DEFAULT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; DEFAULT: vec.epilog.iter.check: -; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 +; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]] ; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; DEFAULT: vec.epilog.ph: ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 +; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]] +; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]] ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; DEFAULT: vec.epilog.vector.body: -; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]] -; DEFAULT-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP19]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 8 -; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 96 -; DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]] +; DEFAULT-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP9]], align 1 +; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP12]] +; DEFAULT-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]] +; DEFAULT-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: -; DEFAULT-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; DEFAULT-NEXT: [[CMP_N6:%.*]] = icmp eq i64 101, [[N_VEC3]] +; DEFAULT-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -70,25 +78,25 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED: vector.ph: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 -; PRED-NEXT: [[TMP9:%.*]] = sub i64 101, [[TMP8]] -; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 101, [[TMP8]] -; PRED-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 +; PRED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; PRED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 +; PRED-NEXT: [[TMP4:%.*]] = sub i64 101, [[TMP3]] +; PRED-NEXT: [[TMP5:%.*]] = icmp ugt i64 101, [[TMP3]] +; PRED-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 101) ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr align 1 [[TMP13]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) +; PRED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] +; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr align 1 [[TMP7]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) -; PRED-NEXT: [[TMP14:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP12:%.*]] = xor i1 [[TMP14]], true -; PRED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; PRED-NEXT: [[TMP8:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true +; PRED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br label [[LOOP:%.*]] +; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: exit: ; PRED-NEXT: ret void ; @@ -113,33 +121,33 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: iter.check: ; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; DEFAULT: vector.memcheck: -; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1000 -; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 -; DEFAULT-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] -; DEFAULT-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]] +; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 1000 +; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; DEFAULT-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; DEFAULT-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] ; DEFAULT-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; DEFAULT-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; DEFAULT: vector.main.loop.iter.check: ; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT3]], <16 x i16> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT4]] to <16 x i8> +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8> ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP4:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TMP4]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP5:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2]] to <16 x i8> -; DEFAULT-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]] -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16 -; DEFAULT-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] -; DEFAULT-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1, !alias.scope [[META9]], !noalias [[META6]] +; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]] +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer +; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8> +; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]] +; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] +; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 -; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 +; DEFAULT-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; DEFAULT: vec.epilog.iter.check: @@ -148,20 +156,20 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP15:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8> +; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8> ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; DEFAULT: vec.epilog.vector.body: -; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP16:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP16]], i64 0 +; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]] +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP18:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8> -; DEFAULT-NEXT: [[TMP14:%.*]] = and <8 x i8> [[TMP18]], [[TMP15]] -; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]] -; DEFAULT-NEXT: store <8 x i8> [[TMP14]], ptr [[TMP26]], align 1, !alias.scope [[META9]], !noalias [[META6]] -; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 8 -; DEFAULT-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT8]], 1000 -; DEFAULT-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8> +; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]] +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]] +; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]] +; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8 +; DEFAULT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 1000 +; DEFAULT-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: @@ -186,35 +194,35 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: entry: ; PRED-NEXT: br label [[VECTOR_MEMCHECK:%.*]] ; PRED: vector.memcheck: -; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1000 -; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 -; PRED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] -; PRED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]] +; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 1000 +; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; PRED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; PRED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] ; PRED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; PRED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PRED: vector.ph: -; PRED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2 +; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1000) -; PRED-NEXT: [[TMP11:%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8> +; PRED-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8> ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[TMP7:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META3:![0-9]+]] -; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7]], i64 0 +; PRED-NEXT: [[TMP3:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META3:![0-9]+]] +; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; PRED-NEXT: [[TMP8:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8> -; PRED-NEXT: [[TMP9:%.*]] = and <vscale x 2 x i8> [[TMP8]], [[TMP11]] -; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP9]], ptr align 1 [[TMP5]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]] +; PRED-NEXT: [[TMP4:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8> +; PRED-NEXT: [[TMP5:%.*]] = and <vscale x 2 x i8> [[TMP4]], [[TMP2]] +; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] +; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP5]], ptr align 1 [[TMP6]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1000) -; PRED-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP13:%.*]] = xor i1 [[TMP12]], true -; PRED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; PRED-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[TMP8:%.*]] = xor i1 [[TMP7]], true +; PRED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br label [[EXIT:%.*]] ; PRED: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index bbc0e33..9c8a187 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -10,8 +10,8 @@ target triple = "aarch64-linux-gnu" ; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16' -; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) -; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 +; DEBUG-NOT: Create Skeleton for epilogue vectorized loop (first pass) +; DEBUG: Executing best plan with VF=vscale x 16, UF=2 ; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16' ; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. @@ -20,9 +20,7 @@ target triple = "aarch64-linux-gnu" define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-LABEL: @main_vf_vscale_x_16( -; CHECK-NEXT: iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] @@ -43,27 +41,12 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]] -; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1 -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; @@ -150,7 +133,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] @@ -261,13 +244,13 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8 @@ -279,7 +262,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -382,14 +365,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000 @@ -400,7 +383,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000 -; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -513,14 +496,14 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 @@ -528,15 +511,15 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP18]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]] -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]] -; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]] +; CHECK-NEXT: store <2 x float> [[TMP17]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -575,8 +558,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4 ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF8-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] @@ -611,34 +594,34 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: entry: ; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024 -; CHECK-NEXT: br label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP10]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4 -; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]] +; CHECK-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4 +; CHECK-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] @@ -676,8 +659,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4 ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF8-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll index 7bf4fbd..91b3099 100644 --- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll @@ -1,6 +1,5 @@ -; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 +; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s ; Check that we version this loop with speculating the value 1 for symbolic ; strides. This also checks that the symbolic stride information is correctly @@ -8,50 +7,60 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; vectorize because we couldn't determine the array bounds for the required ; memchecks. -; CHECK-LABEL: test -define void @test(ptr %A, i64 %AStride, - ptr %B, i32 %BStride, - ptr %C, i64 %CStride, i32 %N) { +define void @test(ptr noalias %A, i64 %AStride, ptr noalias %B, i32 %BStride, ptr noalias %C, i64 %CStride) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[ASTRIDE:%.*]], ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]], ptr noalias [[C:%.*]], i64 [[CSTRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i32 [[BSTRIDE]], 1 +; CHECK-NEXT: [[IDENT_CHECK2:%.*]] = icmp ne i64 [[CSTRIDE]], 1 +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ne i64 [[ASTRIDE]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = or i1 [[IDENT_CHECK1]], [[IDENT_CHECK2]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK6]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH]]: +; entry: - %cmp13 = icmp eq i32 %N, 0 - br i1 %cmp13, label %for.end, label %for.body.preheader - -; CHECK-DAG: icmp ne i64 %AStride, 1 -; CHECK-DAG: icmp ne i32 %BStride, 1 -; CHECK-DAG: icmp ne i64 %CStride, 1 -; CHECK: or -; CHECK: or -; CHECK: br - -; CHECK: vector.body -; CHECK: load <2 x i32> + br label %loop -for.body.preheader: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %iv.trunc = trunc i64 %indvars.iv to i32 +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %iv.trunc = trunc i64 %iv to i32 %mul = mul i32 %iv.trunc, %BStride %mul64 = zext i32 %mul to i64 - %arrayidx = getelementptr inbounds i32, ptr %B, i64 %mul64 - %0 = load i32, ptr %arrayidx, align 4 - %mul2 = mul nsw i64 %indvars.iv, %CStride - %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %mul2 - %1 = load i32, ptr %arrayidx3, align 4 + %gep.x = getelementptr inbounds i32, ptr %B, i64 %mul64 + %0 = load i32, ptr %gep.x, align 4 + %mul2 = mul nsw i64 %iv, %CStride + %gep.c = getelementptr inbounds i32, ptr %C, i64 %mul2 + %1 = load i32, ptr %gep.c, align 4 %mul4 = mul nsw i32 %1, %0 - %mul3 = mul nsw i64 %indvars.iv, %AStride - %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %mul3 - store i32 %mul4, ptr %arrayidx7, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %N - br i1 %exitcond, label %for.end.loopexit, label %for.body - -for.end.loopexit: - br label %for.end - -for.end: + %mul3 = mul nsw i64 %iv, %AStride + %gep.a = getelementptr inbounds i32, ptr %A, i64 %mul3 + store i32 %mul4, ptr %gep.a, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: ret void } @@ -59,36 +68,150 @@ for.end: ; replacing the symbolic stride '%conv'. ; PR18480 -; CHECK-LABEL: fn1 -; CHECK: load <2 x double> - define void @fn1(ptr noalias %x, ptr noalias %c, double %a) { +; CHECK-LABEL: define void @fn1( +; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[C:%.*]], double [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[A]] to i32 +; CHECK-NEXT: [[CONV2:%.*]] = add i32 [[CONV]], 4 +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV2]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[CONV2]] to i64 +; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[CONV]], 1 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP2]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP4]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: %conv = fptosi double %a to i32 %conv2 = add i32 %conv, 4 %cmp8 = icmp sgt i32 %conv2, 0 - br i1 %cmp8, label %for.body.preheader, label %for.end + br i1 %cmp8, label %loop, label %exit -for.body.preheader: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %0 = trunc i64 %indvars.iv to i32 +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %0 = trunc i64 %iv to i32 %mul = mul nsw i32 %0, %conv - %idxprom = sext i32 %mul to i64 - %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom - %1 = load double, ptr %arrayidx, align 8 - %arrayidx3 = getelementptr inbounds double, ptr %c, i64 %indvars.iv - store double %1, ptr %arrayidx3, align 8 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %conv2 - br i1 %exitcond, label %for.end.loopexit, label %for.body + %mul.ext = sext i32 %mul to i64 + %gep.x = getelementptr inbounds double, ptr %x, i64 %mul.ext + %1 = load double, ptr %gep.x, align 8 + %gep.c = getelementptr inbounds double, ptr %c, i64 %iv + store double %1, ptr %gep.c, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %iv.trunc = trunc i64 %iv.next to i32 + %ec = icmp eq i32 %iv.trunc, %conv2 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +; Make sure we do not crash when the stride is poison. +; Test for https://github.com/llvm/llvm-project/issues/162922. +define void @stride_poison(ptr %dst) mustprogress { +; CHECK-LABEL: define void @stride_poison( +; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 poison, i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[UMAX]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], poison +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store i8 0, ptr %gep.dst, align 1 + %iv.next = add nuw nsw i64 %iv, poison + %ec = icmp samesign ult i64 %iv, 100 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +; Make sure we do not crash when the stride is undef. +define void @stride_undef(ptr %dst) mustprogress { +; CHECK-LABEL: define void @stride_undef( +; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[UMAX]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], undef +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], undef +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], undef +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop -for.end.loopexit: - br label %for.end +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store i8 0, ptr %gep.dst, align 1 + %iv.next = add nuw nsw i64 %iv, undef + %ec = icmp samesign ult i64 %iv, 100 + br i1 %ec, label %loop, label %exit -for.end: +exit: ret void } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll index ff0887c..fbf6323 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll @@ -6,9 +6,9 @@ define void @test(ptr %0, i64 %1, i64 %2) { ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3, i32 1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64> ; CHECK-NEXT: br [[DOTPREHEADER_LR_PH:label %.*]] ; CHECK: [[_PREHEADER_LR_PH:.*:]] ; CHECK-NEXT: br [[DOTPREHEADER_US_US_PREHEADER:label %.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll index bfb623a..6d713e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll @@ -9,11 +9,11 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3 ; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] ; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll new file mode 100644 index 0000000..ec554b4 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-100 < %s | FileCheck %s + +define void @test(i32 %arg) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARG]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: br i1 false, label %[[BB8:.*]], label %[[BB4:.*]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]] +; CHECK-NEXT: br label %[[BB8]] +; CHECK: [[BB8]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB4]] ], [ [[TMP1]], %[[BB1]] ] +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: + %sub = sub i32 0, %arg + %add = add i32 0, 0 + %add2 = add i32 0, 0 + %add3 = add i32 0, 0 + br i1 false, label %bb8, label %bb4 + +bb4: + %add5 = add i32 %add3, 0 + %add6 = add i32 0, 0 + %add7 = add i32 0, 0 + br label %bb8 + +bb8: + %phi = phi i32 [ %sub, %bb4 ], [ %sub, %bb1 ] + %phi9 = phi i32 [ %add5, %bb4 ], [ %add, %bb1 ] + %phi10 = phi i32 [ %add6, %bb4 ], [ %add2, %bb1 ] + %phi11 = phi i32 [ %add7, %bb4 ], [ %add3, %bb1 ] + ret void +} + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll index 7c8cb02..2623366 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll @@ -5,11 +5,11 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> <i64 1, i64 1, i64 1, i64 poison>, i64 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128 ; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4 ; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll index ef75a8d..88e2cca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll @@ -10,13 +10,13 @@ define i32 @test(i32 %0, i1 %1) { ; CHECK-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double> ; CHECK-NEXT: br i1 [[TMP1]], label %[[BB7:.*]], label %[[BB9:.*]] ; CHECK: [[BB7]]: -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) ; CHECK-NEXT: br label %[[BB16:.*]] ; CHECK: [[BB9]]: ; CHECK-NEXT: br i1 false, label %[[BB14:.*]], label %[[BB10:.*]] ; CHECK: [[BB10]]: -; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1> +; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]]) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1> ; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> zeroinitializer) ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[BB14]]: diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll index c2e632d..b1cf0e4 100644 --- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll @@ -61,30 +61,49 @@ return: ret i32 %p } -; Check that switch's of powers of two range is not reduced if default case is reachable +; Check that switch's of powers of two range with the default case reachable is reduced +; w/ Zbb enabled, by jumping non-power-of-two inputs to the default block. define i32 @switch_of_powers_reachable_default(i32 %x) { -; CHECK-LABEL: @switch_of_powers_reachable_default( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [ -; CHECK-NEXT: i32 1, label [[BB1:%.*]] -; CHECK-NEXT: i32 8, label [[BB2:%.*]] -; CHECK-NEXT: i32 16, label [[BB3:%.*]] -; CHECK-NEXT: i32 32, label [[BB4:%.*]] -; CHECK-NEXT: i32 64, label [[BB5:%.*]] -; CHECK-NEXT: ] -; CHECK: bb1: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb2: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb3: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb4: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb5: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[P]] +; RV64I-LABEL: @switch_of_powers_reachable_default( +; RV64I-NEXT: entry: +; RV64I-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [ +; RV64I-NEXT: i32 1, label [[BB1:%.*]] +; RV64I-NEXT: i32 8, label [[BB2:%.*]] +; RV64I-NEXT: i32 16, label [[BB3:%.*]] +; RV64I-NEXT: i32 32, label [[BB4:%.*]] +; RV64I-NEXT: i32 64, label [[BB5:%.*]] +; RV64I-NEXT: ] +; RV64I: bb1: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb2: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb3: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb4: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb5: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: return: +; RV64I-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ] +; RV64I-NEXT: ret i32 [[P]] +; +; RV64ZBB-LABEL: @switch_of_powers_reachable_default( +; RV64ZBB-NEXT: entry: +; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]) +; RV64ZBB-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 +; RV64ZBB-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[RETURN:%.*]] +; RV64ZBB: entry.split: +; RV64ZBB-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true) +; RV64ZBB-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; RV64ZBB-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]] +; RV64ZBB: switch.lookup: +; RV64ZBB-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 +; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i64 0, i64 [[TMP4]] +; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; RV64ZBB-NEXT: br label [[RETURN]] +; RV64ZBB: return: +; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ -1, [[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ] +; RV64ZBB-NEXT: ret i32 [[P]] ; entry: switch i32 %x, label %default_case [ diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll index 49eb199..aa95b3f 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll @@ -34,3 +34,97 @@ return: %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ] ret i32 %phi } + +define i32 @switch_of_powers_two_default_reachable(i32 %arg) { +; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]] +; CHECK: [[SWITCH_LOOKUP]]: +; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + switch i32 %arg, label %default_case [ + i32 1, label %bb1 + i32 8, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + +default_case: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ 5, %default_case ] + ret i32 %phi +} + +define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond) { +; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable_multipreds( +; CHECK-SAME: i32 [[ARG:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]] +; CHECK: [[SWITCH]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]] +; CHECK: [[SWITCH_SPLIT]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8 +; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]] +; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]] +; CHECK: [[SWITCH_LOOKUP]]: +; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %cond, label %switch, label %default_case + +switch: + switch i32 %arg, label %default_case [ + i32 1, label %bb1 + i32 8, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + +default_case: + %pn = phi i32 [ 0, %entry ], [ %arg, %switch ] + br label %return + +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ] + ret i32 %phi +} diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll index 0235732..256d0d9 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll @@ -133,19 +133,13 @@ define void @test4(i1 %b, i32 %x) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]] ; CHECK: sw: -; CHECK: i32 0, label [[CASE0:%.*]] -; CHECK-NEXT: i32 1, label [[CASE1:%.*]] -; CHECK-NEXT: i32 2, label [[CASE0]] -; CHECK-NEXT: i32 3, label [[CASE3]] -; CHECK-NEXT: i32 4, label [[DEFAULT:%.*]] -; CHECK-NEXT: ] Edge: [label [[SW]],label %case1], RenamedOp: [[X:%.*]] } -; CHECK-NEXT: [[X_0:%.*]] = bitcast i32 [[X]] to i32 -; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT]] [ -; CHECK-NEXT: i32 0, label [[CASE0]] -; CHECK-NEXT: i32 1, label [[CASE1]] -; CHECK-NEXT: i32 2, label [[CASE0]] -; CHECK-NEXT: i32 3, label [[CASE3]] -; CHECK-NEXT: i32 4, label [[DEFAULT]] +; CHECK: [[X_0:%.*]] = bitcast i32 [[X:%.*]] to i32 +; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[CASE0:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE0]] +; CHECK-NEXT: i32 3, label [[CASE3]] +; CHECK-NEXT: i32 4, label [[DEFAULT]] ; CHECK-NEXT: ] ; CHECK: default: ; CHECK-NEXT: call void @bar(i32 [[X]]) |
