; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p loop-vectorize -S %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.15.0" define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noalias %p.invar, ptr noalias %dst.1, ptr noalias %dst.2) { ; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups( ; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP40:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP40]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP41:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[TMP41]] ; CHECK-NEXT: [[TMP42:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <2 x float> poison, float [[TMP42]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT27]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT28]], <4 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <4 x float> [[TMP46]], <4 x float> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP47]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP44]], align 4 ; CHECK-NEXT: [[TMP48:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT29]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP49:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[TMP41]] ; CHECK-NEXT: [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT30]], <2 x float> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLAT36]], <4 x float> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC31:%.*]] = shufflevector <8 x float> [[TMP51]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP49]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[IV_MUL:%.*]] = shl i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_DST_19:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[IV_MUL]] ; CHECK-NEXT: store float [[L_0]], ptr [[GEP_DST_19]], align 4 ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[IV_MUL]], 1 ; CHECK-NEXT: [[GEP_DST_119:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_1]] ; CHECK-NEXT: store float [[L_1]], ptr [[GEP_DST_119]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = or disjoint i64 [[IV_MUL]], 2 ; CHECK-NEXT: [[GEP_DST_129:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_2]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_129]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = or disjoint i64 [[IV_MUL]], 3 ; CHECK-NEXT: [[GEP_DST_140:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_3]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_140]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[GEP_DST_247:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[IV_MUL]] ; CHECK-NEXT: store float [[L_2]], ptr [[GEP_DST_247]], align 4 ; CHECK-NEXT: [[GEP_DST_255:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_1]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_255]], align 4 ; CHECK-NEXT: [[GEP_DST_265:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_2]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_265]], align 4 ; CHECK-NEXT: [[GEP_DST_276:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_3]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_276]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %l.0 = load float, ptr %p.invar, align 4 %iv.mul = shl i64 %iv, 2 %gep.dst.19 = getelementptr float, ptr %dst.1, i64 %iv.mul store float %l.0, ptr %gep.dst.19, align 4 %l.1 = load float, ptr %p.invar, align 4 %add.1 = or disjoint i64 %iv.mul, 1 %gep.dst.119 = getelementptr float, ptr %dst.1, i64 %add.1 store float %l.1, ptr %gep.dst.119, align 4 %add.2 = or disjoint i64 %iv.mul, 2 %gep.dst.129 = getelementptr float, ptr %dst.1, i64 %add.2 store float 0.000000e+00, ptr %gep.dst.129, align 4 %add.3 = or disjoint i64 %iv.mul, 3 %gep.dst.140 = getelementptr float, ptr %dst.1, i64 %add.3 store float 0.000000e+00, ptr %gep.dst.140, align 4 %l.2 = load float, ptr %p.invar, align 4 %gep.dst.247 = getelementptr float, ptr %dst.2, i64 %iv.mul store float %l.2, ptr %gep.dst.247, align 4 %gep.dst.255 = getelementptr float, ptr %dst.2, i64 %add.1 store float 0.000000e+00, ptr %gep.dst.255, align 4 %gep.dst.265 = getelementptr float, ptr %dst.2, i64 %add.2 store float 0.000000e+00, ptr %gep.dst.265, align 4 %gep.dst.276 = getelementptr float, ptr %dst.2, i64 %add.3 store float 0.000000e+00, ptr %gep.dst.276, align 4 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv.next, 1024 br i1 %ec, label %exit, label %loop exit: ret void } define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr %arg2) #0 { ; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse( ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 18 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG]], i64 16 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 ; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[ARG1]], 5 ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 32 ; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP23]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP13]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG]], [[SCEVGEP12]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[TMP11]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = fadd <2 x float> [[STRIDED_VEC]], [[STRIDED_VEC17]] ; CHECK-NEXT: [[TMP31:%.*]] = fmul <2 x float> [[TMP30]], zeroinitializer ; CHECK-NEXT: [[TMP32:%.*]] = fadd <2 x float> [[STRIDED_VEC14]], [[STRIDED_VEC18]] ; CHECK-NEXT: [[TMP33:%.*]] = fmul <2 x float> [[TMP32]], zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x float> [[STRIDED_VEC15]], [[STRIDED_VEC19]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x float> [[TMP34]], zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = fadd <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC20]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x float> [[TMP36]], zeroinitializer ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> [[TMP33]], <4 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[SHL_IV_5]] ; CHECK-NEXT: [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]] ; CHECK-NEXT: [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[SHL_IV_4]] ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]] ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00 ; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_3]], align 4 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 4 ; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 4 ; CHECK-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]] ; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 4 ; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_6]], align 4 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 8 ; CHECK-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 8 ; CHECK-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]] ; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 8 ; CHECK-NEXT: store float [[MUL_3]], ptr [[GEP_9]], align 4 ; CHECK-NEXT: [[I27:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 12 ; CHECK-NEXT: [[L_7:%.*]] = load float, ptr [[I27]], align 4 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 12 ; CHECK-NEXT: [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4 ; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]] ; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00 ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 12 ; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %shl.iv.5 = shl i64 %iv, 5 %gep.1 = getelementptr inbounds i8, ptr %arg, i64 %shl.iv.5 %add.5 = or disjoint i64 %shl.iv.5, 16 %gep.2 = getelementptr i8, ptr %arg, i64 %add.5 %shl.iv.4 = shl i64 %iv, 4 %gep.3 = getelementptr inbounds i8, ptr %arg2, i64 %shl.iv.4 %l.1 = load float, ptr %gep.1, align 4 %l.2 = load float, ptr %gep.2, align 4 %add.1 = fadd float %l.1, %l.2 %mul.1 = fmul float %add.1, 0.000000e+00 store float %mul.1, ptr %gep.3, align 4 %gep.4 = getelementptr inbounds i8, ptr %gep.1, i64 4 %l.3 = load float, ptr %gep.4, align 4 %gep.5 = getelementptr inbounds i8, ptr %gep.2, i64 4 %l.4 = load float, ptr %gep.5, align 4 %add.2 = fadd float %l.3, %l.4 %mul.2 = fmul float %add.2, 0.000000e+00 %gep.6 = getelementptr inbounds i8, ptr %gep.3, i64 4 store float %mul.2, ptr %gep.6, align 4 %gep.7 = getelementptr inbounds i8, ptr %gep.1, i64 8 %l.5 = load float, ptr %gep.7, align 4 %gep.8 = getelementptr inbounds i8, ptr %gep.2, i64 8 %l.6 = load float, ptr %gep.8, align 4 %add.3 = fadd float %l.5, %l.6 %mul.3 = fmul float %add.3, 0.000000e+00 %gep.9 = getelementptr inbounds i8, ptr %gep.3, i64 8 store float %mul.3, ptr %gep.9, align 4 %i27 = getelementptr inbounds i8, ptr %gep.1, i64 12 %l.7 = load float, ptr %i27, align 4 %gep.10 = getelementptr inbounds i8, ptr %gep.2, i64 12 %l.8 = load float, ptr %gep.10, align 4 %add.4 = fadd float %l.7, %l.8 %mul.4 = fmul float %add.4, 0.000000e+00 %gep.11 = getelementptr inbounds i8, ptr %gep.3, i64 12 store float %mul.4, ptr %gep.11, align 4 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, %arg1 br i1 %ec, label %exit, label %loop exit: ret void } define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) #1 { ; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse2( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 56 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]] ; CHECK-NEXT: [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]] ; CHECK-NEXT: [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4 ; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]] ; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]] ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]] ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12 ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]] ; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]] ; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20 ; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]] ; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]] ; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24 ; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]] ; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]] ; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28 ; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]] ; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]] ; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4 ; CHECK-NEXT: [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1 ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]] ; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]] ; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]] ; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]] ; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]] ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP44]], [[TMP57]] ; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]] ; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]] ; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]] ; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]] ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]] ; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP40]], [[TMP70]] ; CHECK-NEXT: [[TMP73:%.*]] = or i1 [[TMP72]], [[TMP34]] ; CHECK-NEXT: br i1 [[TMP73]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5 ; CHECK-NEXT: [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4 ; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]] ; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[TMP43]], 32 ; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP53]] ; CHECK-NEXT: [[TMP46:%.*]] = shl i64 [[TMP42]], 4 ; CHECK-NEXT: [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8 ; CHECK-NEXT: [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] ; CHECK-NEXT: [[BOUND2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND2]] ; CHECK-NEXT: [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] ; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] ; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND132]], [[BOUND133]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i64 4, i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP49]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP51:%.*]] = lshr exact i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[TMP51]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4, !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META15:![0-9]+]], !noalias [[META13]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <8 x i32> [[TMP58]], <8 x i32> [[TMP59]], <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <8 x i32> [[TMP60]], <8 x i32> zeroinitializer, <16 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <32 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4, !alias.scope [[META13]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[SHR_1:%.*]] = lshr exact i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[SHR_1]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 [[L]], ptr [[GEP_A]], align 4 ; CHECK-NEXT: [[IV_NEXT:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_1]], align 4 ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = or disjoint i64 [[IV]], 2 ; CHECK-NEXT: [[SHR_2:%.*]] = lshr exact i64 [[IV_NEXT_1]], 1 ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr i32, ptr [[B]], i64 [[SHR_2]] ; CHECK-NEXT: [[TMP65:%.*]] = load i32, ptr [[GEP_B_2]], align 4 ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: store i32 [[TMP65]], ptr [[GEP_A_2]], align 4 ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = or disjoint i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_A_3:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_3]], align 4 ; CHECK-NEXT: [[IV_NEXT_3:%.*]] = or disjoint i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_B_4:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[GEP_B_4]], align 4 ; CHECK-NEXT: [[GEP_A_4:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_3]] ; CHECK-NEXT: store i32 [[TMP66]], ptr [[GEP_A_4]], align 4 ; CHECK-NEXT: [[IV_NEXT_4:%.*]] = or disjoint i64 [[IV]], 5 ; CHECK-NEXT: [[GEP_A_5:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_4]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_5]], align 4 ; CHECK-NEXT: [[IV_NEXT_5:%.*]] = or disjoint i64 [[IV]], 6 ; CHECK-NEXT: [[GEP_A_6:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_5]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_6]], align 4 ; CHECK-NEXT: [[IV_NEXT_6:%.*]] = or disjoint i64 [[IV]], 7 ; CHECK-NEXT: [[GEP_A_7:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_6]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_7]], align 4 ; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next.7, %loop ] %shr.1 = lshr exact i64 %iv, 1 %gep.B = getelementptr nusw i32, ptr %B, i64 %shr.1 %l = load i32, ptr %gep.B, align 4 %gep.A = getelementptr i32, ptr %A, i64 %iv store i32 %l, ptr %gep.A, align 4 %iv.next = or disjoint i64 %iv, 1 %gep.A.1 = getelementptr i32, ptr %A, i64 %iv.next store i32 0, ptr %gep.A.1, align 4 %iv.next.1 = or disjoint i64 %iv, 2 %shr.2 = lshr exact i64 %iv.next.1, 1 %gep.B.2 = getelementptr i32, ptr %B, i64 %shr.2 %1 = load i32, ptr %gep.B.2, align 4 %gep.A.2 = getelementptr i32, ptr %A, i64 %iv.next.1 store i32 %1, ptr %gep.A.2, align 4 %iv.next.2 = or disjoint i64 %iv, 3 %gep.A.3 = getelementptr i32, ptr %A, i64 %iv.next.2 store i32 0, ptr %gep.A.3, align 4 %iv.next.3 = or disjoint i64 %iv, 4 %gep.B.4 = getelementptr i32, ptr %B, i64 %iv %2 = load i32, ptr %gep.B.4, align 4 %gep.A.4 = getelementptr i32, ptr %A, i64 %iv.next.3 store i32 %2, ptr %gep.A.4, align 4 %iv.next.4 = or disjoint i64 %iv, 5 %gep.A.5 = getelementptr i32, ptr %A, i64 %iv.next.4 store i32 0, ptr %gep.A.5, align 4 %iv.next.5 = or disjoint i64 %iv, 6 %gep.A.6 = getelementptr i32, ptr %A, i64 %iv.next.5 store i32 0, ptr %gep.A.6, align 4 %iv.next.6 = or disjoint i64 %iv, 7 %gep.A.7 = getelementptr i32, ptr %A, i64 %iv.next.6 store i32 0, ptr %gep.A.7, align 4 %iv.next.7 = add nuw nsw i64 %iv, 8 %ec = icmp eq i64 %iv, %N br i1 %ec, label %exit, label %loop exit: ret void } ; Test case for https://github.com/llvm/llvm-project/issues/112922. define void @interleave_store_double_i64(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_double_i64( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1 store i64 %iv, ptr %gep.1, align 8 %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv store double 0.000000e+00, ptr %gep.0, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } define void @interleave_store_i64_double(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_i64_double( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv store double 0.000000e+00, ptr %gep.0, align 8 %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1 store i64 %iv, ptr %gep.1, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } ; TODO: The interleave group should likely have the same cost as @interleave_store_double_i64. define void @interleave_store_double_i64_2(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_double_i64_2( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1 store double 0.000000e+00, ptr %gep.1, align 8 %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv store i64 %iv, ptr %gep.0, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_i64_double_2( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv store i64 %iv, ptr %gep.0, align 8 %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1 store double 0.000000e+00, ptr %gep.1, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } attributes #0 = { "target-features"="+sse4.2" } attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[META3]] = !{[[META4:![0-9]+]]} ; CHECK: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]]} ; CHECK: [[META5]] = distinct !{[[META5]], !"LVerDomain"} ; CHECK: [[META6]] = !{[[META7:![0-9]+]]} ; CHECK: [[META7]] = distinct !{[[META7]], [[META5]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} ; CHECK: [[META10]] = !{[[META11:![0-9]+]]} ; CHECK: [[META11]] = distinct !{[[META11]], [[META12:![0-9]+]]} ; CHECK: [[META12]] = distinct !{[[META12]], !"LVerDomain"} ; CHECK: [[META13]] = !{[[META14:![0-9]+]]} ; CHECK: [[META14]] = distinct !{[[META14]], [[META12]]} ; CHECK: [[META15]] = !{[[META16:![0-9]+]]} ; CHECK: [[META16]] = distinct !{[[META16]], [[META12]]} ; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]]} ;.