; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilogue=predicate-dont-vectorize -enable-wide-lane-mask -S | FileCheck %s target triple = "thumbv8.1m.main-arm-unknown-eabihf" define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 { ; CHECK-LABEL: define void @f0( ; CHECK-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[VAL:%.*]] = icmp sgt i64 [[N]], 0 ; CHECK-NEXT: br i1 [[VAL]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] ; CHECK: [[FOR_BODY_PREHEADER]]: ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 31 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 32 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[N]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[TMP0]], i64 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK1]], <16 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 3) ; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD2]], splat (i8 3) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16 ; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP4]], ptr [[TMP6]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP5]], ptr [[TMP8]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] ; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP10]], 3 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8 [[MUL]], ptr [[ARRAYIDX3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; entry: %val = icmp sgt i64 %n, 0 br i1 %val, label %for.body, label %for.end for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i8, ptr %src, i64 %indvars.iv %0 = load i8, ptr %arrayidx, align 1 %mul = mul i8 %0, 3 %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv store i8 %mul, ptr %arrayidx3, align 1 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 for.end: ret void } attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+mve.fp" } !1 = distinct !{!1, !2, !3} !2 = !{!"llvm.loop.vectorize.width", i32 16} !3 = !{!"llvm.loop.interleave.count", i32 2} ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK: [[META4]] = !{!"llvm.loop.vectorize.width", i32 16} ; CHECK: [[META5]] = !{!"llvm.loop.interleave.count", i32 2} ;.