diff options
Diffstat (limited to 'llvm/test/Analysis/BasicAA')
-rw-r--r-- | llvm/test/Analysis/BasicAA/intrinsics.ll | 4 | ||||
-rw-r--r-- | llvm/test/Analysis/BasicAA/matrix-intrinsics.ll | 30 | ||||
-rw-r--r-- | llvm/test/Analysis/BasicAA/ptr-vector.ll | 4 | ||||
-rw-r--r-- | llvm/test/Analysis/BasicAA/scalable-dse-aa.ll | 94 |
4 files changed, 81 insertions, 51 deletions
diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index 56d762b..52dd65d 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -10,8 +10,8 @@ define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> % ; CHECK-LABEL: define <8 x i16> @test0( ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], <8 x i16> [[Y:%.*]], <8 x i1> [[M:%.*]], <8 x i16> [[PT:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[P]], i32 16, <8 x i1> [[M]], <8 x i16> [[PT]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr [[Q]], i32 16, <8 x i1> [[M]]) +; CHECK-NEXT: [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr align 16 [[P]], <8 x i1> [[M]], <8 x i16> [[PT]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr align 16 [[Q]], <8 x i1> [[M]]) ; CHECK-NEXT: [[C:%.*]] = add <8 x i16> [[A]], [[A]] ; CHECK-NEXT: ret <8 x i16> [[C]] ; diff --git a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll new file mode 100644 index 0000000..1de8ab5 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll @@ -0,0 +1,30 @@ +; RUN: opt %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +; BasicAA should prove that loads from sufficiently large static offsets +; don't overlap with matrix loads with a statically known size. + +define <8 x double> @non_overlapping_strided_load(ptr %src) { +; CHECK-LABEL: Function: non_overlapping_strided_load: +; Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) +; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 12 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + ret <8 x double> %l +} + +define <8 x double> @overlapping_strided_load(ptr %src) { +; CHECK-LABEL: Function: overlapping_strided_load: +; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 11 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + ret <8 x double> %l +} + +declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) +declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) diff --git a/llvm/test/Analysis/BasicAA/ptr-vector.ll b/llvm/test/Analysis/BasicAA/ptr-vector.ll index 7dea24f..598c170 100644 --- a/llvm/test/Analysis/BasicAA/ptr-vector.ll +++ b/llvm/test/Analysis/BasicAA/ptr-vector.ll @@ -1,8 +1,8 @@ ; RUN: opt -print-all-alias-modref-info -passes=aa-eval -disable-output < %s 2>&1 | FileCheck %s ; CHECK: MayAlias: i8* %b, i8* %p -; CHECK: Just Ref: Ptr: i8* %p <-> %v1p = call <1 x ptr> @llvm.masked.load.v1p0.p0(ptr %a, i32 8, <1 x i1> %c, <1 x ptr> poison) -; CHECK: Just Ref: Ptr: i8* %b <-> %v1p = call <1 x ptr> @llvm.masked.load.v1p0.p0(ptr %a, i32 8, <1 x i1> %c, <1 x ptr> poison) +; CHECK: Just Ref: Ptr: i8* %p <-> %v1p = call <1 x ptr> @llvm.masked.load.v1p0.p0(ptr align 8 %a, <1 x i1> %c, <1 x ptr> poison) +; CHECK: Just Ref: Ptr: i8* %b <-> %v1p = call <1 x ptr> @llvm.masked.load.v1p0.p0(ptr align 8 %a, <1 x i1> %c, <1 x ptr> poison) define void @test(ptr %a, ptr %b, <1 x i1> %c) { %v1p = call <1 x ptr> @llvm.masked.load.v1p0.p0(ptr %a, i32 8, <1 x i1> %c, <1 x ptr> poison) %p = bitcast <1 x ptr> %v1p to ptr diff --git a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll index ffd8259..5cf0ae9 100644 --- a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll +++ b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll @@ -10,12 +10,12 @@ define <vscale x 4 x float> @dead_scalable_store(ptr %0) { ; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 ; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 ; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 -; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr align 1 [[GEP_ARR_16]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_48]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr align 1 [[GEP_ARR_48]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_ARR_16]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_ARR_48]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] ; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; @@ -54,12 +54,12 @@ define <4 x float> @dead_scalable_store_fixed(ptr %0) { ; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 ; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 ; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 -; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]]) -; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]]) -; CHECK-NEXT: [[FADDOP0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) -; CHECK-NEXT: [[FADDOP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_0_16]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_16]], ptr align 1 [[GEP_ARR_16]], <4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_0_48]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_48]], ptr align 1 [[GEP_ARR_48]], <4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_ARR_16]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_ARR_48]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) ; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x float> [[FADDOP0]], [[FADDOP1]] ; CHECK-NEXT: ret <4 x float> [[FADD]] ; @@ -101,14 +101,14 @@ define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) { ; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 ; CHECK-NEXT: [[GEP_ARR_30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 30 ; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 -; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[LOAD_0_30:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_30]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_30]], ptr nonnull [[GEP_ARR_30]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr align 1 [[GEP_ARR_16]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_30:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_30]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_30]], ptr align 1 [[GEP_ARR_30]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_48]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr align 1 [[GEP_ARR_48]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_ARR_16]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_ARR_48]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] ; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; @@ -147,13 +147,13 @@ define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) { ; CHECK-NEXT: [[GEP_0_46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 46 ; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 ; CHECK-NEXT: [[GEP_ARR_46:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 46 -; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[LOAD_0_46:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_46]], ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr align 1 [[GEP_ARR_16]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_46:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_46]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_46]], ptr align 1 [[GEP_ARR_46]], <vscale x 4 x i1> [[MASK]]) ; CHECK-NEXT: [[SMALLMASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 2) -; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[SMALLMASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_ARR_16]], <vscale x 4 x i1> [[SMALLMASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_ARR_46]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] ; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; @@ -188,9 +188,9 @@ define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store( ; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4) -; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4) @@ -210,9 +210,9 @@ define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) ; CHECK-NEXT: [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7) ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 5 ; CHECK-NEXT: store i32 50, ptr [[GEP_5]], align 4 -; CHECK-NEXT: [[LOAD_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP0]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0]], ptr align 1 [[TMP1]], <4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP1]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) ; CHECK-NEXT: ret <4 x float> [[RETVAL]] ; %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7) @@ -237,9 +237,9 @@ define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) { ; CHECK-NEXT: store i32 10, ptr [[GEP_1_12]], align 4 ; CHECK-NEXT: [[GEP_1_28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 28 ; CHECK-NEXT: store i32 20, ptr [[GEP_1_28]], align 4 -; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) @@ -260,9 +260,9 @@ define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) { ; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2) ; CHECK-NEXT: store i32 20, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2) @@ -280,9 +280,9 @@ define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) { ; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2) ; CHECK-NEXT: store i32 20, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2) @@ -300,9 +300,9 @@ define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) { ; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8 0, i8 7) ; CHECK-NEXT: [[GEP_1_8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 ; CHECK-NEXT: store i8 120, ptr [[GEP_1_8]], align 1 -; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[LOAD_0]], ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP0]], <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[LOAD_0]], ptr align 1 [[TMP1]], <vscale x 16 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP1]], <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) ; CHECK-NEXT: ret <vscale x 16 x i8> [[RETVAL]] ; %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i8.i8(i8 0, i8 7) @@ -323,9 +323,9 @@ define <vscale x 4 x float> @dead_scalar_store_offset(ptr noalias %0, ptr %1) { ; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) ; CHECK-NEXT: [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 ; CHECK-NEXT: store i32 40, ptr [[GEP_1_12]], align 4 -; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) -; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) |