diff options
Diffstat (limited to 'llvm/test/Analysis/BasicAA')
-rw-r--r-- | llvm/test/Analysis/BasicAA/intrinsics.ll | 15 | ||||
-rw-r--r-- | llvm/test/Analysis/BasicAA/scalable-dse-aa.ll | 149 |
2 files changed, 132 insertions, 32 deletions
diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index f8b30df..56d762b 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -aa-pipeline=basic-aa -passes=gvn -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" @@ -5,12 +6,15 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; BasicAA should prove that these calls don't interfere, since they are ; IntrArgReadMem and have noalias pointers. -; CHECK: define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[ATTR:#[0-9]+]] -; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m) -; CHECK-NEXT: %c = add <8 x i16> %a, %a define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { +; CHECK-LABEL: define <8 x i16> @test0( +; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], <8 x i16> [[Y:%.*]], <8 x i1> [[M:%.*]], <8 x i16> [[PT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[P]], i32 16, <8 x i1> [[M]], <8 x i16> [[PT]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr [[Q]], i32 16, <8 x i1> [[M]]) +; CHECK-NEXT: [[C:%.*]] = add <8 x i16> [[A]], [[A]] +; CHECK-NEXT: ret <8 x i16> [[C]] +; entry: %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m) @@ -24,4 +28,3 @@ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } ; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } -; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll index 7e980c9..ffd8259 100644 --- a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll +++ b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll @@ -1,10 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s define <vscale x 4 x float> @dead_scalable_store(ptr %0) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store( -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask) -; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; %arr = alloca [64 x i32], align 4 %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) @@ -34,9 +47,21 @@ define <vscale x 4 x float> @dead_scalable_store(ptr %0) { define <4 x float> @dead_scalable_store_fixed(ptr %0) { ; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed( -; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <4 x i1> %mask) -; CHECK-NOT: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.32, ptr nonnull %gep.arr.36, i32 1, <4 x i1> %mask2) -; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <4 x float> [[FADD]] ; %arr = alloca [64 x i32], align 4 %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) @@ -67,9 +92,25 @@ define <4 x float> @dead_scalable_store_fixed(ptr %0) { define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) { ; CHECK-LABEL: define <vscale x 4 x float> @scalable_store_partial_overwrite( -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 30 +; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 30 +; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_30:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_30]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_30]], ptr nonnull [[GEP_ARR_30]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; %arr = alloca [64 x i32], align 4 %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) @@ -99,9 +140,23 @@ define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) { define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store_small_mask( -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask) -; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.46, ptr nonnull %gep.arr.46, i32 1, <vscale x 4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 46 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_46:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 46 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_46:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_46]], ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[SMALLMASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 2) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[SMALLMASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] +; %arr = alloca [64 x i32], align 4 %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) @@ -131,7 +186,12 @@ define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) { define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store( -; CHECK-NOT: store i32 20, ptr %gep.1.12 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4) %gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12 @@ -144,10 +204,17 @@ define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) { } -; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask( -; CHECK-NOT: store i32 20, ptr %1 -; CHECK: store i32 50, ptr %gep.5 define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) { +; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask( +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7) +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 5 +; CHECK-NEXT: store i32 50, ptr [[GEP_5]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP0]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: ret <4 x float> [[RETVAL]] +; %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7) store i32 20, ptr %1 @@ -164,8 +231,16 @@ define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) ; This get active lane mask may cover 4 or 8 integers define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @mask_gt_minimum_num_elts( -; CHECK: store i32 10, ptr %gep.1.12 -; CHECK: store i32 20, ptr %gep.1.28 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) +; CHECK-NEXT: [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 +; CHECK-NEXT: store i32 10, ptr [[GEP_1_12]], align 4 +; CHECK-NEXT: [[GEP_1_28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 28 +; CHECK-NEXT: store i32 20, ptr [[GEP_1_28]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) %gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12 @@ -182,7 +257,13 @@ define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) { ; Don't do anything if the mask's Op1 < Op0 define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_lt( -; CHECK: store i32 20, ptr %1 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2) +; CHECK-NEXT: store i32 20, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2) store i32 20, ptr %1 @@ -196,7 +277,13 @@ define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) { ; Don't do anything if the mask's Op1 == Op0 define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_eq( -; CHECK: store i32 20, ptr %1 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2) +; CHECK-NEXT: store i32 20, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2) store i32 20, ptr %1 @@ -209,8 +296,14 @@ define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) { define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 16 x i8> @scalar_stores_small_mask( -; CHECK-NOT: store i8 60, ptr %gep.1.6 -; CHECK: store i8 120, ptr %gep.1.8 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8 0, i8 7) +; CHECK-NEXT: [[GEP_1_8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: store i8 120, ptr [[GEP_1_8]], align 1 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[LOAD_0]], ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret <vscale x 16 x i8> [[RETVAL]] ; %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i8.i8(i8 0, i8 7) %gep.1.6 = getelementptr inbounds nuw i8, ptr %1, i64 6 @@ -226,10 +319,14 @@ define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) { define <vscale x 4 x float> @dead_scalar_store_offset(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store_offset( -; CHECK-NOT: store i32 10, ptr %gep.1.0 -; CHECK-NOT: store i32 20, ptr %gep.1.4 -; CHECK-NOT: store i32 30, ptr %gep.1.8 -; CHECK: store i32 40, ptr %gep.1.12 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) +; CHECK-NEXT: [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 +; CHECK-NEXT: store i32 40, ptr [[GEP_1_12]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) %gep.1.0 = getelementptr inbounds nuw i8, ptr %1, i64 0 |