diff options
Diffstat (limited to 'llvm/test')
89 files changed, 6306 insertions, 647 deletions
diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index f8b30df..56d762b 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -aa-pipeline=basic-aa -passes=gvn -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" @@ -5,12 +6,15 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; BasicAA should prove that these calls don't interfere, since they are ; IntrArgReadMem and have noalias pointers. -; CHECK: define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[ATTR:#[0-9]+]] -; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m) -; CHECK-NEXT: %c = add <8 x i16> %a, %a define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { +; CHECK-LABEL: define <8 x i16> @test0( +; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], <8 x i16> [[Y:%.*]], <8 x i1> [[M:%.*]], <8 x i16> [[PT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[P]], i32 16, <8 x i1> [[M]], <8 x i16> [[PT]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr [[Q]], i32 16, <8 x i1> [[M]]) +; CHECK-NEXT: [[C:%.*]] = add <8 x i16> [[A]], [[A]] +; CHECK-NEXT: ret <8 x i16> [[C]] +; entry: %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m) @@ -24,4 +28,3 @@ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } ; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } -; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll index 7e980c9..ffd8259 100644 --- a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll +++ b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll @@ -1,10 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s define <vscale x 4 x float> @dead_scalable_store(ptr %0) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store( -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask) -; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; %arr = alloca [64 x i32], align 4 %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) @@ -34,9 +47,21 @@ define <vscale x 4 x float> @dead_scalable_store(ptr %0) { define <4 x float> @dead_scalable_store_fixed(ptr %0) { ; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed( -; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <4 x i1> %mask) -; CHECK-NOT: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.32, ptr nonnull %gep.arr.36, i32 1, <4 x i1> %mask2) -; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <4 x float> [[FADD]] ; %arr = alloca [64 x i32], align 4 %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) @@ -67,9 +92,25 @@ define <4 x float> @dead_scalable_store_fixed(ptr %0) { define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) { ; CHECK-LABEL: define <vscale x 4 x float> @scalable_store_partial_overwrite( -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 30 +; CHECK-NEXT: [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 30 +; CHECK-NEXT: [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_30:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_30]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_30]], ptr nonnull [[GEP_ARR_30]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] ; %arr = alloca [64 x i32], align 4 %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) @@ -99,9 +140,23 @@ define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) { define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store_small_mask( -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask) -; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask) -; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.46, ptr nonnull %gep.arr.46, i32 1, <vscale x 4 x i1> %mask) +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[ARR:%.*]] = alloca [64 x i32], align 4 +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) +; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +; CHECK-NEXT: [[GEP_0_46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 46 +; CHECK-NEXT: [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16 +; CHECK-NEXT: [[GEP_ARR_46:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 46 +; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[LOAD_0_46:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_46]], ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[SMALLMASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 2) +; CHECK-NEXT: [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[SMALLMASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]] +; CHECK-NEXT: ret <vscale x 4 x float> [[FADD]] +; %arr = alloca [64 x i32], align 4 %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) @@ -131,7 +186,12 @@ define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) { define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store( -; CHECK-NOT: store i32 20, ptr %gep.1.12 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4) +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4) %gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12 @@ -144,10 +204,17 @@ define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) { } -; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask( -; CHECK-NOT: store i32 20, ptr %1 -; CHECK: store i32 50, ptr %gep.5 define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) { +; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask( +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7) +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 5 +; CHECK-NEXT: store i32 50, ptr [[GEP_5]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP0]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) +; CHECK-NEXT: ret <4 x float> [[RETVAL]] +; %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7) store i32 20, ptr %1 @@ -164,8 +231,16 @@ define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) ; This get active lane mask may cover 4 or 8 integers define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @mask_gt_minimum_num_elts( -; CHECK: store i32 10, ptr %gep.1.12 -; CHECK: store i32 20, ptr %gep.1.28 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) +; CHECK-NEXT: [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 +; CHECK-NEXT: store i32 10, ptr [[GEP_1_12]], align 4 +; CHECK-NEXT: [[GEP_1_28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 28 +; CHECK-NEXT: store i32 20, ptr [[GEP_1_28]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) %gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12 @@ -182,7 +257,13 @@ define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) { ; Don't do anything if the mask's Op1 < Op0 define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_lt( -; CHECK: store i32 20, ptr %1 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2) +; CHECK-NEXT: store i32 20, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2) store i32 20, ptr %1 @@ -196,7 +277,13 @@ define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) { ; Don't do anything if the mask's Op1 == Op0 define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_eq( -; CHECK: store i32 20, ptr %1 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2) +; CHECK-NEXT: store i32 20, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2) store i32 20, ptr %1 @@ -209,8 +296,14 @@ define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) { define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 16 x i8> @scalar_stores_small_mask( -; CHECK-NOT: store i8 60, ptr %gep.1.6 -; CHECK: store i8 120, ptr %gep.1.8 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8 0, i8 7) +; CHECK-NEXT: [[GEP_1_8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: store i8 120, ptr [[GEP_1_8]], align 1 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[LOAD_0]], ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret <vscale x 16 x i8> [[RETVAL]] ; %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i8.i8(i8 0, i8 7) %gep.1.6 = getelementptr inbounds nuw i8, ptr %1, i64 6 @@ -226,10 +319,14 @@ define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) { define <vscale x 4 x float> @dead_scalar_store_offset(ptr noalias %0, ptr %1) { ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store_offset( -; CHECK-NOT: store i32 10, ptr %gep.1.0 -; CHECK-NOT: store i32 20, ptr %gep.1.4 -; CHECK-NOT: store i32 30, ptr %gep.1.8 -; CHECK: store i32 40, ptr %gep.1.12 +; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) +; CHECK-NEXT: [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 +; CHECK-NEXT: store i32 40, ptr [[GEP_1_12]], align 4 +; CHECK-NEXT: [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> [[RETVAL]] ; %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4) %gep.1.0 = getelementptr inbounds nuw i8, ptr %1, i64 0 diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll b/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll index 3e85760..2c838e2 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll @@ -1,14 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @load_store(ptr %ptrs) { ; CHECK-LABEL: 'load_store' -; CHECK-NEXT: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, ptr undef -; CHECK-NEXT: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, ptr undef -; CHECK-NEXT: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, ptr undef -; CHECK-NEXT: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, ptr undef -; CHECK-NEXT: Invalid cost for instruction: store <vscale x 1 x i128> %load1, ptr %ptrs +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, ptr undef, align 16 +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, ptr undef, align 32 +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, ptr undef, align 16 +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, ptr undef, align 32 +; CHECK-NEXT: Cost Model: Invalid cost for instruction: store <vscale x 1 x i128> %load1, ptr %ptrs, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %load1 = load <vscale x 1 x i128>, ptr undef %load2 = load <vscale x 2 x i128>, ptr undef %load3 = load <vscale x 1 x fp128>, ptr undef @@ -19,8 +22,10 @@ define void @load_store(ptr %ptrs) { define void @masked_load_store(ptr %ptrs, ptr %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) { ; CHECK-LABEL: 'masked_load_store' -; CHECK-NEXT: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) -; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) call void @llvm.masked.store.nxv1i128(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask) ret void @@ -28,8 +33,10 @@ define void @masked_load_store(ptr %ptrs, ptr %val, <vscale x 1 x i1> %mask, <vs define void @masked_gather_scatter(<vscale x 1 x ptr> %ptrs, <vscale x 1 x ptr> %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) { ; CHECK-LABEL: 'masked_gather_scatter' -; CHECK-NEXT: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) -; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) call void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask) ret void diff --git a/llvm/test/Analysis/ScalarEvolution/ne-guard-multiple-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/ne-guard-multiple-trip-count.ll new file mode 100644 index 0000000..220c5a1 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/ne-guard-multiple-trip-count.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print<scalar-evolution>' -disable-output %s 2>&1 | FileCheck %s + +declare void @foo() + +; Tests with multiple guards for the same value and different values. + +define void @test_guard_order_b_then_c_and_d(ptr %a, ptr %b, ptr %c, ptr %d) { +; CHECK-LABEL: 'test_guard_order_b_then_c_and_d' +; CHECK-NEXT: Classifying expressions for: @test_guard_order_b_then_c_and_d +; CHECK-NEXT: %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%a,+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_order_b_then_c_and_d +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.eq.b = icmp ne ptr %a, %b + %cmp.eq.c = icmp ne ptr %a, %c + %cmp.eq.d = icmp ne ptr %b, %d + call void @llvm.assume(i1 %cmp.eq.b) + call void @llvm.assume(i1 %cmp.eq.c) + call void @llvm.assume(i1 %cmp.eq.d) + br label %loop + +loop: + %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] + %iv.next = getelementptr i8, ptr %iv, i64 1 + call void @foo() + %ec = icmp eq ptr %iv.next, %b + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_guard_order_d_then_c_and_b(ptr %a, ptr %b, ptr %c, ptr %d) { +; CHECK-LABEL: 'test_guard_order_d_then_c_and_b' +; CHECK-NEXT: Classifying expressions for: @test_guard_order_d_then_c_and_b +; CHECK-NEXT: %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%a,+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_order_d_then_c_and_b +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.eq.b = icmp ne ptr %a, %b + %cmp.eq.c = icmp ne ptr %a, %c + %cmp.eq.d = icmp ne ptr %b, %d + call void @llvm.assume(i1 %cmp.eq.d) + call void @llvm.assume(i1 %cmp.eq.c) + call void @llvm.assume(i1 %cmp.eq.b) + br label %loop + +loop: + %iv = phi ptr [ %a, %entry ], [ %iv.next, %loop ] + %iv.next = getelementptr i8, ptr %iv, i64 1 + call void @foo() + %ec = icmp eq ptr %iv.next, %b + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll index 7ec674a..dc4a72e 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" @@ -5,12 +6,15 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; TBAA should prove that these calls don't interfere, since they are ; IntrArgReadMem and have TBAA metadata. -; CHECK: define <8 x i16> @test0(ptr %p, ptr %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[NUW:#[0-9]+]] -; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m) -; CHECK-NEXT: %c = add <8 x i16> %a, %a define <8 x i16> @test0(ptr %p, ptr %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { +; CHECK-LABEL: define <8 x i16> @test0( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], <8 x i16> [[Y:%.*]], <8 x i1> [[M:%.*]], <8 x i16> [[PT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[P]], i32 16, <8 x i1> [[M]], <8 x i16> [[PT]]) #[[ATTR2:[0-9]+]], !tbaa [[B_TBAA0:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr [[Q]], i32 16, <8 x i1> [[M]]), !tbaa [[A_TBAA3:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add <8 x i16> [[A]], [[A]] +; CHECK-NEXT: ret <8 x i16> [[C]] +; entry: %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind, !tbaa !2 call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m), !tbaa !1 @@ -24,10 +28,16 @@ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } ; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } -; CHECK: attributes [[NUW]] = { nounwind } !0 = !{!"tbaa root"} !1 = !{!3, !3, i64 0} !2 = !{!4, !4, i64 0} !3 = !{!"A", !0} !4 = !{!"B", !0} +;. +; CHECK: [[B_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"B", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"tbaa root"} +; CHECK: [[A_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK: [[META4]] = !{!"A", [[META2]]} +;. diff --git a/llvm/test/Assembler/dicompileunit-invalid-language-version.ll b/llvm/test/Assembler/dicompileunit-invalid-language-version.ll new file mode 100644 index 0000000..b3794ac --- /dev/null +++ b/llvm/test/Assembler/dicompileunit-invalid-language-version.ll @@ -0,0 +1,25 @@ +; RUN: split-file %s %t +; RUN: not llvm-as < %t/dw_lang_with_version.ll -disable-output 2>&1 | FileCheck %s --check-prefix=WRONG-ATTR +; RUN: not llvm-as < %t/overflow.ll -disable-output 2>&1 | FileCheck %s --check-prefix=OVERFLOW +; RUN: not llvm-as < %t/version_without_name.ll -disable-output 2>&1 | FileCheck %s --check-prefix=NO-NAME +; RUN: not llvm-as < %t/negative.ll -disable-output 2>&1 | FileCheck %s --check-prefix=NEGATIVE + +; WRONG-ATTR: error: 'sourceLanguageVersion' requires an associated 'sourceLanguageName' on !DICompileUnit +; OVERFLOW: error: value for 'sourceLanguageVersion' too large, limit is 4294967295 +; NEGATIVE: error: expected unsigned integer +; NO-NAME: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit + +;--- dw_lang_with_version.ll +!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageVersion: 1, + file: !DIFile(filename: "", directory: "")) + +;--- overflow.ll +!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 4294967298) + +;--- negative.ll +!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: -1, + file: !DIFile(filename: "", directory: "")) + +;--- version_without_name.ll +!0 = distinct !DICompileUnit(sourceLanguageVersion: 1, + file: !DIFile(filename: "", directory: "")) diff --git a/llvm/test/Bitcode/Inputs/compile-unit-no-versioned-language.bc b/llvm/test/Bitcode/Inputs/compile-unit-no-versioned-language.bc Binary files differnew file mode 100644 index 0000000..461a34d0 --- /dev/null +++ b/llvm/test/Bitcode/Inputs/compile-unit-no-versioned-language.bc diff --git a/llvm/test/Bitcode/dwarf-source-language-version.ll b/llvm/test/Bitcode/dwarf-source-language-version.ll new file mode 100644 index 0000000..311afd5 --- /dev/null +++ b/llvm/test/Bitcode/dwarf-source-language-version.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s --implicit-check-not "sourceLanguageVersion: 0" + +; CHECK: sourceLanguageVersion: 120 + +source_filename = "cu.cpp" +target triple = "arm64-apple-macosx" + +!llvm.dbg.cu = !{!0, !5} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, sourceLanguageVersion: 120, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!1 = !DIFile(filename: "cu.cpp", directory: "/tmp") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, sourceLanguageVersion: 0, file: !6, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!6 = !DIFile(filename: "cu2.cpp", directory: "/tmp") diff --git a/llvm/test/Bitcode/upgrade-DICompileUnit-no-versioned-language.test b/llvm/test/Bitcode/upgrade-DICompileUnit-no-versioned-language.test new file mode 100644 index 0000000..9475f9b --- /dev/null +++ b/llvm/test/Bitcode/upgrade-DICompileUnit-no-versioned-language.test @@ -0,0 +1,21 @@ +; Test loading metadata which was not aware of versioned language names. +; +; RUN: llvm-dis -o - %p/Inputs/compile-unit-no-versioned-language.bc \ +; RUN: | FileCheck %s --implicit-check-not "sourceLanguageName" --implicit-check-not "sourceLanguageVersion" + +; Input bitcode file was compiled from following source on +; LLVM commit `fc22b58c25963ece6b041cadbdc931c2338955e4`: +; +; source_filename = "cu.cpp" +; target triple = "arm64-apple-macosx" +; +; !llvm.dbg.cu = !{!0} +; !llvm.module.flags = !{!3, !4} +; +; !0 = distinct !DICompileUnit(language: DW_LANG_ObjC, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +; !1 = !DIFile(filename: "cu.cpp", directory: "/tmp") +; !2 = !{} +; !3 = !{i32 7, !"Dwarf Version", i32 5} +; !4 = !{i32 2, !"Debug Info Version", i32 3} + +; CHECK: distinct !DICompileUnit(language: DW_LANG_ObjC, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir index cc75774..c2bf95c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir @@ -15,8 +15,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[C1]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C1]], [[C2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY2]], [[C2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s16>), [[C1]](s64) @@ -91,7 +92,8 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) - ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[C3]], [[C2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C3]](s64) + ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[COPY3]], [[C2]] ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL1]](s64) ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[PTR_ADD1]](p0) :: (store (s32)) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s16>), [[C3]](s64) diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll index 649d0a9..e7e9ee7 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll @@ -1,41 +1,54 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s -o -| FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s +; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smmla.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smmla v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ret entry: -; CHECK-LABEL: smmla.v4i32.v16i8 -; CHECK: smmla v0.4s, v1.16b, v2.16b %vmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) ret <4 x i32> %vmmla1.i } define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: ummla.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ummla v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ret entry: -; CHECK-LABEL: ummla.v4i32.v16i8 -; CHECK: ummla v0.4s, v1.16b, v2.16b %vmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) ret <4 x i32> %vmmla1.i } define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: usmmla.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: usmmla v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usmmla.v4i32.v16i8 -; CHECK: usmmla v0.4s, v1.16b, v2.16b %vusmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3 ret <4 x i32> %vusmmla1.i } define <2 x i32> @usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: usdot.v2i32.v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: usdot v0.2s, v1.8b, v2.8b +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usdot.v2i32.v8i8 -; CHECK: usdot v0.2s, v1.8b, v2.8b %vusdot1.i = tail call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) ret <2 x i32> %vusdot1.i } define <2 x i32> @usdot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: usdot_lane.v2i32.v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: usdot v0.2s, v1.8b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usdot_lane.v2i32.v8i8 -; CHECK: usdot v0.2s, v1.8b, v2.4b[0] %0 = bitcast <8 x i8> %b to <2 x i32> %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer %1 = bitcast <2 x i32> %shuffle to <8 x i8> @@ -44,9 +57,12 @@ entry: } define <2 x i32> @sudot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: sudot_lane.v2i32.v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sudot v0.2s, v1.8b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: sudot_lane.v2i32.v8i8 -; CHECK: sudot v0.2s, v1.8b, v2.4b[0] %0 = bitcast <8 x i8> %b to <2 x i32> %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer %1 = bitcast <2 x i32> %shuffle to <8 x i8> @@ -55,9 +71,11 @@ entry: } define <2 x i32> @usdot_lane.v2i32.v16i8(<2 x i32> %r, <8 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: usdot_lane.v2i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: usdot v0.2s, v1.8b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usdot_lane.v2i32.v16i8 -; CHECK: usdot v0.2s, v1.8b, v2.4b[0] %0 = bitcast <16 x i8> %b to <4 x i32> %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> zeroinitializer %1 = bitcast <2 x i32> %shuffle to <8 x i8> @@ -66,9 +84,11 @@ entry: } define <2 x i32> @sudot_lane.v2i32.v16i8(<2 x i32> %r, <8 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: sudot_lane.v2i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sudot v0.2s, v1.8b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: sudot_lane.v2i32.v16i8 -; CHECK: sudot v0.2s, v1.8b, v2.4b[0] %0 = bitcast <16 x i8> %b to <4 x i32> %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> zeroinitializer %1 = bitcast <2 x i32> %shuffle to <8 x i8> @@ -77,17 +97,22 @@ entry: } define <4 x i32> @usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: usdot.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: usdot v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usdot.v4i32.v16i8 -; CHECK: usdot v0.4s, v1.16b, v2.16b %vusdot1.i = tail call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3 ret <4 x i32> %vusdot1.i } define <4 x i32> @usdot_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: usdot_lane.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: usdot v0.4s, v1.16b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usdot_lane.v4i32.v16i8 -; CHECK: usdot v0.4s, v1.16b, v2.4b[0] %0 = bitcast <8 x i8> %b to <2 x i32> %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer %1 = bitcast <4 x i32> %shuffle to <16 x i8> @@ -96,9 +121,12 @@ entry: } define <4 x i32> @sudot_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: sudot_lane.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sudot v0.4s, v1.16b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: sudot_lane.v4i32.v16i8 -; CHECK: sudot v0.4s, v1.16b, v2.4b[0] %0 = bitcast <8 x i8> %b to <2 x i32> %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer %1 = bitcast <4 x i32> %shuffle to <16 x i8> @@ -107,9 +135,11 @@ entry: } define <4 x i32> @usdot_laneq.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: usdot_laneq.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: usdot v0.4s, v1.16b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: usdot_laneq.v4i32.v16i8 -; CHECK: usdot v0.4s, v1.16b, v2.4b[0] %0 = bitcast <16 x i8> %b to <4 x i32> %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer %1 = bitcast <4 x i32> %shuffle to <16 x i8> @@ -118,9 +148,11 @@ entry: } define <4 x i32> @sudot_laneq.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: sudot_laneq.v4i32.v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sudot v0.4s, v1.16b, v2.4b[0] +; CHECK-NEXT: ret entry: -; CHECK-LABEL: sudot_laneq.v4i32.v16i8 -; CHECK: sudot v0.4s, v1.16b, v2.4b[0] %0 = bitcast <16 x i8> %b to <4 x i32> %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer %1 = bitcast <4 x i32> %shuffle to <16 x i8> @@ -133,4 +165,3 @@ declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 declare <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2 declare <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2 declare <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2 - diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll index 15ee6a0..36655f6 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll @@ -359,12 +359,177 @@ define float @fadd_reduct_reassoc_v4v8f32(<vscale x 4 x float> %a, <vscale x 8 x ret float %r } +; No FMULV instruction so use knowledge about the architectural maximum size of +; an SVE register to "scalarise" the reduction. + +define half @fmulv_nxv2f16(half %init, <vscale x 2 x half> %a) { +; CHECK-LABEL: fmulv_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z2.h, #1.00000000 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: fmul h0, h0, h1 +; CHECK-NEXT: ret + %res = call fast half @llvm.vector.reduce.fmul.nxv2f16(half %init, <vscale x 2 x half> %a) + ret half %res +} + +define half @fmulv_nxv4f16(half %init, <vscale x 4 x half> %a) { +; CHECK-LABEL: fmulv_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z2.h, #1.00000000 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: fmul h0, h0, h1 +; CHECK-NEXT: ret + %res = call fast half @llvm.vector.reduce.fmul.nxv4f16(half %init, <vscale x 4 x half> %a) + ret half %res +} + +define half @fmulv_nxv8f16(half %init, <vscale x 8 x half> %a) { +; CHECK-LABEL: fmulv_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z2.h, #1.00000000 +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: uzp2 z3.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: fmul z1.h, z1.h, z3.h +; CHECK-NEXT: fmul h0, h0, h1 +; CHECK-NEXT: ret + %res = call fast half @llvm.vector.reduce.fmul.nxv8f16(half %init, <vscale x 8 x half> %a) + ret half %res +} + +define float @fmulv_nxv2f32(float %init, <vscale x 2 x float> %a) { +; CHECK-LABEL: fmulv_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z2.s, #1.00000000 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: fmul s0, s0, s1 +; CHECK-NEXT: ret + %res = call fast float @llvm.vector.reduce.fmul.nxv2f32(float %init, <vscale x 2 x float> %a) + ret float %res +} + +define float @fmulv_nxv4f32(float %init, <vscale x 4 x float> %a) { +; CHECK-LABEL: fmulv_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z2.s, #1.00000000 +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.s, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.s, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.s, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.s, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.s, z1.s, z3.s +; CHECK-NEXT: uzp2 z3.s, z1.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: fmul z1.s, z1.s, z3.s +; CHECK-NEXT: fmul s0, s0, s1 +; CHECK-NEXT: ret + %res = call fast float @llvm.vector.reduce.fmul.nxv4f32(float %init, <vscale x 4 x float> %a) + ret float %res +} + +define double @fmulv_nxv2f64(double %init, <vscale x 2 x double> %a) { +; CHECK-LABEL: fmulv_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z2.d, #1.00000000 +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.d, z1.d, z3.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.d, z1.d, z3.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.d, z1.d, z3.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.d, z1.d, z3.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z2.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z1.d, z1.d, z3.d +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: ret + %res = call fast double @llvm.vector.reduce.fmul.nxv2f64(double %init, <vscale x 2 x double> %a) + ret double %res +} + declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>) declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>) declare half @llvm.vector.reduce.fadd.nxv8f16(half, <vscale x 8 x half>) -declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>) -declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>) -declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>) declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>) declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>) declare float @llvm.vector.reduce.fadd.nxv8f32(float, <vscale x 8 x float>) @@ -397,3 +562,10 @@ declare half @llvm.vector.reduce.fminimum.nxv8f16(<vscale x 8 x half>) declare float @llvm.vector.reduce.fminimum.nxv2f32(<vscale x 2 x float>) declare float @llvm.vector.reduce.fminimum.nxv4f32(<vscale x 4 x float>) declare double @llvm.vector.reduce.fminimum.nxv2f64(<vscale x 2 x double>) + +declare half @llvm.vector.reduce.fmul.nxv2f16(half, <vscale x 2 x half>) +declare half @llvm.vector.reduce.fmul.nxv4f16(half, <vscale x 4 x half>) +declare half @llvm.vector.reduce.fmul.nxv8f16(half, <vscale x 8 x half>) +declare float @llvm.vector.reduce.fmul.nxv2f32(float, <vscale x 2 x float>) +declare float @llvm.vector.reduce.fmul.nxv4f32(float, <vscale x 4 x float>) +declare double @llvm.vector.reduce.fmul.nxv2f64(double, <vscale x 2 x double>) diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll index be936f0..6fb0315 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll @@ -369,6 +369,131 @@ define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) { ret i64 %res } +; No MULV instruction so use knowledge about the architectural maximum size of +; an SVE register to "scalarise" the reduction. + +define i8 @mulv_nxv16i8(<vscale x 16 x i8> %a) { +; CHECK-LABEL: mulv_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #1 // =0x1 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: uzp2 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %res = call i8 @llvm.vector.reduce.mul.nxv16i8(<vscale x 16 x i8> %a) + ret i8 %res +} + +define i16 @mulv_nxv8i16(<vscale x 8 x i16> %a) { +; CHECK-LABEL: mulv_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #1 // =0x1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: uzp2 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %res = call i16 @llvm.vector.reduce.mul.nxv8i16(<vscale x 8 x i16> %a) + ret i16 %res +} + +define i32 @mulv_nxv4i32(<vscale x 4 x i32> %a) { +; CHECK-LABEL: mulv_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uzp2 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: uzp2 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: uzp2 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: uzp2 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: uzp2 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: uzp2 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %res = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %a) + ret i32 %res +} + +define i64 @mulv_nxv2i64(<vscale x 2 x i64> %a) { +; CHECK-LABEL: mulv_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uzp2 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: uzp2 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: uzp2 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: uzp2 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: uzp2 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret + %res = call i64 @llvm.vector.reduce.mul.nxv2i64(<vscale x 2 x i64> %a) + ret i64 %res +} + ; Test widen vector reduce type declare i8 @llvm.vector.reduce.smin.nxv10i8(<vscale x 10 x i8>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll index 26b9d99..8705647 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll @@ -206,7 +206,7 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 { ; global nnan function attribute always forces clamp combine -define float @test_min_max_global_nnan(float %a) #3 { +define float @test_min_max_global_nnan(float %a) { ; GFX10-LABEL: test_min_max_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -223,11 +223,11 @@ define float @test_min_max_global_nnan(float %a) #3 { ; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp ; GFX12-NEXT: s_setpc_b64 s[30:31] %maxnum = call float @llvm.maxnum.f32(float %a, float 0.0) - %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0) + %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0) ret float %fmed } -define float @test_max_min_global_nnan(float %a) #3 { +define float @test_max_min_global_nnan(float %a) { ; GFX10-LABEL: test_max_min_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -244,7 +244,7 @@ define float @test_max_min_global_nnan(float %a) #3 { ; GFX12-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp ; GFX12-NEXT: s_setpc_b64 s[30:31] %minnum = call float @llvm.minnum.f32(float %a, float 1.0) - %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0) + %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0) ret float %fmed } @@ -414,5 +414,4 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) attributes #0 = {"amdgpu-ieee"="true"} attributes #1 = {"amdgpu-ieee"="false"} attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"} -attributes #3 = {"no-nans-fp-math"="true"} attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll index d2c93e7..696a87b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll @@ -232,7 +232,7 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 { ; global nnan function attribute always forces fmed3 combine -define float @test_min_max_global_nnan(float %a) #2 { +define float @test_min_max_global_nnan(float %a) { ; GFX10-LABEL: test_min_max_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -254,12 +254,12 @@ define float @test_min_max_global_nnan(float %a) #2 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] - %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0) + %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0) %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0) ret float %fmed } -define float @test_max_min_global_nnan(float %a) #2 { +define float @test_max_min_global_nnan(float %a) { ; GFX10-LABEL: test_max_min_global_nnan: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -281,8 +281,8 @@ define float @test_max_min_global_nnan(float %a) #2 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] - %minnum = call float @llvm.minnum.f32(float %a, float 4.0) - %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) + %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0) + %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0) ret float %fmed } @@ -560,4 +560,3 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) attributes #0 = {"amdgpu-ieee"="true"} attributes #1 = {"amdgpu-ieee"="false"} -attributes #2 = {"no-nans-fp-math"="true"} diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index 9e15225..3145a27 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -10,7 +10,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s -define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %o ret void } -define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -231,7 +231,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, pt ret void } -define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -342,7 +342,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -453,7 +453,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -569,7 +569,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrsp ret void } -define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -740,7 +740,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1 ret void } -define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -955,14 +955,14 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out, %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid %a = load float, ptr addrspace(1) %gep0 - %max = call float @llvm.maxnum.f32(float %a, float 2.0) - %med = call float @llvm.minnum.f32(float %max, float 4.0) + %max = call nnan float @llvm.maxnum.f32(float %a, float 2.0) + %med = call nnan float @llvm.minnum.f32(float %max, float 4.0) store float %med, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -1297,10 +1297,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a - %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -1487,10 +1487,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 %b.fneg = fsub float -0.0, %b - %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b.fneg) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b.fneg) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -1677,10 +1677,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 %c.fneg = fsub float -0.0, %c - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fneg) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -1872,14 +1872,14 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a - %b.fabs = call float @llvm.fabs.f32(float %b) - %c.fabs = call float @llvm.fabs.f32(float %c) + %b.fabs = call nnan float @llvm.fabs.f32(float %b) + %c.fabs = call nnan float @llvm.fabs.f32(float %c) %c.fabs.fneg = fsub float -0.0, %c.fabs - %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs) - %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b.fabs) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b.fabs) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void @@ -2082,16 +2082,16 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs %c.fabs = call float @llvm.fabs.f32(float %c) %c.fabs.fneg = fsub float -0.0, %c.fabs - %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) - %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -2266,7 +2266,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, pt ret void } -define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -2418,7 +2418,7 @@ define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %ou ret void } -define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -2570,7 +2570,7 @@ define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr ret void } -define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -2878,10 +2878,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3030,10 +3030,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3220,10 +3220,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a - %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b) - %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b) - %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b) + %tmp1 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b) + %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3372,10 +3372,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3524,10 +3524,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3676,10 +3676,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3828,10 +3828,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -3980,10 +3980,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -4132,10 +4132,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -4284,10 +4284,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -4436,10 +4436,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %o %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -4588,10 +4588,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -4740,10 +4740,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %a, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -4892,10 +4892,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -5044,10 +5044,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -5196,10 +5196,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -5348,10 +5348,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.minnum.f32(float %b, float %a) - %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) - %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) - %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) + %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a) + %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a) + %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0) store float %med3, ptr addrspace(1) %outgep ret void } @@ -5503,10 +5503,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) % %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %tmp0 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp1 = call float @llvm.minnum.f32(float %a, float %b) - %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp1 = call nnan float @llvm.minnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -5515,7 +5515,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) % ; Negative patterns ; --------------------------------------------------------------------- -define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -5717,7 +5717,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -5944,7 +5944,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -6146,7 +6146,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -6352,7 +6352,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr ret void } -define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -6527,7 +6527,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -6702,7 +6702,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -6877,7 +6877,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) ret void } -define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -7270,10 +7270,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 %a.fneg = fsub float -0.0, %a - %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) - %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) - %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b) + %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) + %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %med3, ptr addrspace(1) %outgep ret void } @@ -7428,13 +7428,13 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out %a = load volatile float, ptr addrspace(1) %gep0 %b = load volatile float, ptr addrspace(1) %gep1 %c = load volatile float, ptr addrspace(1) %gep2 - %max = call float @llvm.maxnum.f32(float %a, float %b) - %minmax = call float @llvm.minnum.f32(float %max, float %c) + %max = call nnan float @llvm.maxnum.f32(float %a, float %b) + %minmax = call nnan float @llvm.minnum.f32(float %max, float %c) store float %minmax, ptr addrspace(1) %outgep ret void } -define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -7597,7 +7597,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o ret void } -define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { +define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { ; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 @@ -7865,7 +7865,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt ret void } -define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: two_non_inline_constant: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -7998,7 +7998,7 @@ define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr ad } ; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants. -define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: one_non_inline_constant: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -8137,7 +8137,7 @@ define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr ad ret void } -define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { +define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) { ; SI-SDAG-LABEL: two_non_inline_constant_multi_use: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 @@ -8343,7 +8343,7 @@ define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %o ret void } -define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 { +define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) { ; SI-LABEL: v_test_fmed3_r_i_i_f32_minimumnum_maximumnum: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8384,7 +8384,7 @@ define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 { ret float %med } -define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %a) #1 { +define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %a) { ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8452,7 +8452,7 @@ define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> % ret <2 x float> %med } -define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(float %a) #1 { +define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(float %a) { ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8525,7 +8525,7 @@ define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use( ret { float, float } %ins.1 } -define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 { +define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) { ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8567,7 +8567,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) ret float %med } -define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) #1 { +define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) { ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8609,7 +8609,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) ret float %med } -define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) #1 { +define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) { ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8651,7 +8651,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) #1 { ret float %med } -define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) #1 { +define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) { ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8693,7 +8693,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) #1 { ret float %med } -define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 { +define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) { ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8772,7 +8772,7 @@ define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 { ret half %med } -define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) #1 { +define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) { ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8848,7 +8848,7 @@ define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) ret <2 x half> %med } -define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) #1 { +define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) { ; SI-LABEL: v_test_fmed3_r_i_i_f64_minimumnum_maximumnum: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8905,5 +8905,4 @@ declare half @llvm.minnum.f16(half, half) #0 declare half @llvm.maxnum.f16(half, half) #0 attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index 6b09424..eee232a 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -49,7 +49,6 @@ bb: ret void } -; FIXME: This generates "instid1(/* invalid instid value */)". define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) { ; GFX11-LABEL: f2: ; GFX11: ; %bb.0: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll index 56f9c5d..d578d2e 100644 --- a/llvm/test/CodeGen/AMDGPU/minmax.ll +++ b/llvm/test/CodeGen/AMDGPU/minmax.ll @@ -612,10 +612,10 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) ; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4 ; GFX1250-NEXT: global_store_b32 v[0:1], v2, off ; GFX1250-NEXT: s_set_pc_i64 s[30:31] - %tmp0 = call float @llvm.minnum.f32(float %x, float %y) - %tmp1 = call float @llvm.maxnum.f32(float %x, float %y) - %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z) - %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minnum.f32(float %x, float %y) + %tmp1 = call nnan float @llvm.maxnum.f32(float %x, float %y) + %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %z) + %tmp3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) store float %tmp3, ptr addrspace(1) %arg ret void } @@ -646,10 +646,10 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x ; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4 ; GFX1250-NEXT: global_store_b32 v[0:1], v2, off ; GFX1250-NEXT: s_set_pc_i64 s[30:31] - %tmp0 = call float @llvm.minimumnum.f32(float %x, float %y) - %tmp1 = call float @llvm.maximumnum.f32(float %x, float %y) - %tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z) - %tmp3 = call float @llvm.maximumnum.f32(float %tmp0, float %tmp2) + %tmp0 = call nnan float @llvm.minimumnum.f32(float %x, float %y) + %tmp1 = call nnan float @llvm.maximumnum.f32(float %x, float %y) + %tmp2 = call nnan float @llvm.minimumnum.f32(float %tmp1, float %z) + %tmp3 = call nnan float @llvm.maximumnum.f32(float %tmp0, float %tmp2) store float %tmp3, ptr addrspace(1) %arg ret void } @@ -1280,10 +1280,10 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 ; GISEL-GFX1250-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4 ; GISEL-GFX1250-FAKE16-NEXT: global_store_b16 v[0:1], v2, off ; GISEL-GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31] - %tmp0 = call half @llvm.minnum.f16(half %x, half %y) - %tmp1 = call half @llvm.maxnum.f16(half %x, half %y) - %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z) - %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2) + %tmp0 = call nnan half @llvm.minnum.f16(half %x, half %y) + %tmp1 = call nnan half @llvm.maxnum.f16(half %x, half %y) + %tmp2 = call nnan half @llvm.minnum.f16(half %tmp1, half %z) + %tmp3 = call nnan half @llvm.maxnum.f16(half %tmp0, half %tmp2) store half %tmp3, ptr addrspace(1) %arg ret void } diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll index a2d6ca9..972a470 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll @@ -27,7 +27,7 @@ entry: !1 = !{i64 0, !"_ZTSFivE.generalized"} !2 = !{i64 0, !"_ZTSFviE.generalized"} -; CHECK: .section .callgraph,"o",%progbits,.text +; CHECK: .section .llvm.callgraph,"o",%progbits,.text ;; Version ; CHECK-NEXT: .byte 0 ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll index bf5249e..ec8d5b8 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll @@ -1,8 +1,8 @@ ;; Test if temporary labels are generated for each indirect callsite. -;; Test if the .callgraph section contains the MD5 hash of callees' type (type id) +;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id) ;; is correctly paired with its corresponding temporary label generated for indirect ;; call sites annotated with !callee_type metadata. -;; Test if the .callgraph section contains unique direct callees. +;; Test if the .llvm.callgraph section contains unique direct callees. ; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s @@ -36,7 +36,7 @@ entry: !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} -; CHECK: .section .callgraph,"o",%progbits,.text +; CHECK: .section .llvm.callgraph,"o",%progbits,.text ;; Version ; CHECK-NEXT: .byte 0 ;; Flags diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll index d577603..8036004 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll @@ -1,7 +1,7 @@ -;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls. +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls. ; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { entry: @@ -27,7 +27,7 @@ declare !type !2 i32 @bar(i8 signext) !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{i64 0, !"_ZTSFiiE.generalized"} -; CHECK: Hex dump of section '.callgraph': +; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154 ; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8 ;; Verify that the type id 0x308e4b8159bc8654 is in section. diff --git a/llvm/test/CodeGen/ARM/call-graph-section.ll b/llvm/test/CodeGen/ARM/call-graph-section.ll index 928a1067..167cc6f 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section.ll @@ -1,7 +1,7 @@ -;; Tests that we store the type identifiers in .callgraph section of the object file. +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file. ; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s declare !type !0 void @foo() @@ -31,7 +31,7 @@ entry: ;; Make sure following type IDs are in call graph section ;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 -; CHECK: Hex dump of section '.callgraph': +; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000324 ; CHECK-NEXT: 0x00000010 44f731f5 eecb3e54 86bc5981 4b8e307a ; CHECK-NEXT: 0x00000020 de6814f8 97fd77 diff --git a/llvm/test/CodeGen/ARM/nnan-fsub.ll b/llvm/test/CodeGen/ARM/nnan-fsub.ll index 0183908..78dd36f 100644 --- a/llvm/test/CodeGen/ARM/nnan-fsub.ll +++ b/llvm/test/CodeGen/ARM/nnan-fsub.ll @@ -1,18 +1,22 @@ -; RUN: llc -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s -; RUN: llc -mcpu=cortex-a9 --enable-no-nans-fp-math < %s | FileCheck -check-prefix=FAST %s +; RUN: llc -mcpu=cortex-a9 < %s | FileCheck %s target triple = "armv7-apple-ios" -; SAFE: test -; FAST: test +; CHECK-LABEL: test define float @test(float %x, float %y) { entry: -; SAFE: vmul.f32 -; SAFE: vsub.f32 -; FAST: mov r0, #0 +; CHECK: vmul.f32 +; CHECK-NEXT: vsub.f32 %0 = fmul float %x, %y %1 = fsub float %0, %0 ret float %1 } - +; CHECK-LABEL: test_nnan +define float @test_nnan(float %x, float %y) { +entry: +; CHECK: mov r0, #0 + %0 = fmul float %x, %y + %1 = fsub nnan float %0, %0 + ret float %1 +} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll index a78fdd5..f1486f97 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll @@ -74,7 +74,7 @@ entry: ; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1 ; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2 ; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0 -; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32 +; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8 ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 @@ -83,9 +83,9 @@ entry: ; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 ; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1 ; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 32 -; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32 - call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 64, i1 false) +; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24 +; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8 + call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false) ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7) diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll index 7ba2ed2..f1d28e2 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll @@ -19,11 +19,11 @@ target triple = "dxil-pc-shadermodel6.6-compute" ; PRINT:; Resource Bindings: ; PRINT-NEXT:; -; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count -; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------ -; PRINT-NEXT:; CB1 cbuffer NA NA CB0 cb0 1 -; PRINT-NEXT:; CB2 cbuffer NA NA CB1 cb1 1 -; PRINT-NEXT:; MyConstants cbuffer NA NA CB2 cb5,space15 1 +; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count +; PRINT-NEXT:; ---- +; PRINT-NEXT:; CB1 cbuffer NA NA CB0 cb0 1 +; PRINT-NEXT:; CB2 cbuffer NA NA CB1 cb1 1 +; PRINT-NEXT:; MyConstants cbuffer NA NA CB2 cb5,space15 1 define void @test() #0 { diff --git a/llvm/test/CodeGen/DirectX/bufferGetDimensions.ll b/llvm/test/CodeGen/DirectX/bufferGetDimensions.ll new file mode 100644 index 0000000..ff03bf1 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/bufferGetDimensions.ll @@ -0,0 +1,16 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +define i32 @test_getdimensions_no_mips() { + ; CHECK: %[[HANDLE:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, + ; CHECK-NEXT: %[[ANNOT_HANDLE:.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[HANDLE]] + %handle = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + + ; CHECK-NEXT: %[[RETVAL:.*]] = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %[[ANNOT_HANDLE]], i32 undef) + ; CHECK-NEXT: %[[DIM:.*]] = extractvalue %dx.types.Dimensions %[[RETVAL]], 0 + %1 = call i32 @llvm.dx.resource.getdimensions.x(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %handle) + + ; CHECK-NEXT: ret i32 %[[DIM]] + ret i32 %1 +} diff --git a/llvm/test/CodeGen/Hexagon/swp-many-stores.mir b/llvm/test/CodeGen/Hexagon/swp-many-stores.mir new file mode 100644 index 0000000..bf14dcf --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-many-stores.mir @@ -0,0 +1,88 @@ +# RUN: llc -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null -pipeliner-max-num-stores=5 2>&1 | FileCheck %s +# REQUIRES: asserts + +# This loop has six stores, which exceeds the limit set by +# `pipeliner-max-num-stores`. + +# CHECK: Too many stores + +--- | + target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + target triple = "hexagon-unknown-linux-musl" + + define void @f(ptr %a, i32 %n) #0 { + entry: + %guard = icmp sgt i32 %n, 0 + %btc = sub nsw i32 %n, 1 + br i1 %guard, label %loop.preheader, label %exit + + loop.preheader: ; preds = %entry + %0 = add i32 %n, 1 + %cgep = getelementptr i8, ptr %a, i32 %0 + br label %loop + + loop: ; preds = %loop.preheader, %loop + %lsr.iv = phi ptr [ %cgep, %loop.preheader ], [ %cgep8, %loop ] + %i = phi i32 [ %i.dec, %loop ], [ %btc, %loop.preheader ] + %cgep7 = getelementptr i8, ptr %lsr.iv, i32 -2 + store i8 0, ptr %cgep7, align 1 + %cgep8 = getelementptr i8, ptr %lsr.iv, i32 -1 + store i8 1, ptr %cgep8, align 1 + store i8 2, ptr %lsr.iv, align 1 + %cgep9 = getelementptr i8, ptr %lsr.iv, i32 1 + store i8 3, ptr %cgep9, align 1 + %cgep10 = getelementptr i8, ptr %lsr.iv, i32 2 + store i8 4, ptr %cgep10, align 1 + %cgep11 = getelementptr i8, ptr %lsr.iv, i32 3 + store i8 5, ptr %cgep11, align 1 + %i.dec = sub i32 %i, 1 + %ec = icmp eq i32 %i.dec, 0 + br i1 %ec, label %exit, label %loop + + exit: ; preds = %loop, %entry + ret void + } + + attributes #0 = { "target-cpu"="hexagonv79" } +... +--- +name: f +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x50000000), %bb.3(0x30000000) + liveins: $r0, $r1 + + %7:intregs = COPY $r1 + %6:intregs = COPY $r0 + %8:predregs = C2_cmpgti %7, 0 + J2_jumpf %8, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.loop.preheader: + successors: %bb.2(0x80000000) + + %0:intregs = A2_addi %7, -1 + %1:intregs = S4_addaddi %7, %6, 1 + %10:intregs = A2_tfrsi 0 + %11:intregs = A2_tfrsi 1 + %14:intregs = COPY %0 + J2_loop0r %bb.2, %14, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2.loop (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %2:intregs = PHI %1, %bb.1, %4, %bb.2 + S2_storerb_io %2, -2, %10 :: (store (s8) into %ir.cgep7) + %4:intregs = A2_addi %2, -1 + S2_storerb_io %2, -1, %11 :: (store (s8) into %ir.cgep8) + S4_storeirb_io %2, 0, 2 :: (store (s8) into %ir.lsr.iv) + S4_storeirb_io %2, 1, 3 :: (store (s8) into %ir.cgep9) + S4_storeirb_io %2, 2, 4 :: (store (s8) into %ir.cgep10) + S4_storeirb_io %2, 3, 5 :: (store (s8) into %ir.cgep11) + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3.exit: + PS_jmpret $r31, implicit-def dead $pc +... diff --git a/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll b/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll new file mode 100644 index 0000000..e67d031 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s + +; The addition of vector `A` with vector of 1s currently uses `vspltisw` to generate vector of 1s followed by add operation. + +; Function for the vector type v2i64 `a + {1, 1}` +define <2 x i64> @test_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vspltisw v3, 1 +; CHECK-NEXT: vupklsw v3, v3 +; CHECK-NEXT: vaddudm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %add = add <2 x i64> %a, splat (i64 1) + ret <2 x i64> %add +} + +; Function for the vector type v4i32 `a + {1, 1, 1, 1}` +define <4 x i32> @test_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vspltisw v3, 1 +; CHECK-NEXT: vadduwm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %add = add <4 x i32> %a, splat (i32 1) + ret <4 x i32> %add +} + +; Function for the vector type v8i16 `a + {1, 1, 1, 1, 1, 1, 1, 1}` +define <8 x i16> @test_v8i16(<8 x i16> %a) { +; CHECK-LABEL: test_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vspltish v3, 1 +; CHECK-NEXT: vadduhm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %add = add <8 x i16> %a, splat (i16 1) + ret <8 x i16> %add +} + +; Function for the vector type v16i8 `a + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}` +define <16 x i8> @test_16i8(<16 x i8> %a) { +; CHECK-LABEL: test_16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v3, 1 +; CHECK-NEXT: vaddubm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %add = add <16 x i8> %a, splat (i8 1) + ret <16 x i8> %add +} diff --git a/llvm/test/CodeGen/PowerPC/vector-all-ones.ll b/llvm/test/CodeGen/PowerPC/vector-all-ones.ll deleted file mode 100644 index e4c93adc..0000000 --- a/llvm/test/CodeGen/PowerPC/vector-all-ones.ll +++ /dev/null @@ -1,23 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s - -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \ -; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s - -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ -; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s - -; Currently the generated code uses `vspltisw` to generate vector of 1s followed by add operation. -; This pattern is expected to be optimized in a future patch by using `xxleqv` to generate vector of -1s -; followed by subtraction operation. -define dso_local noundef <4 x i32> @test1(<4 x i32> %a) { -; CHECK-LABEL: test1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vspltisw v3, 1 -; CHECK-NEXT: vadduwm v2, v2, v3 -; CHECK-NEXT: blr -entry: - %add = add <4 x i32> %a, splat (i32 1) - ret <4 x i32> %add -} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 2e500d5..da7546e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -689,8 +689,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INSERT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_EXTRACT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv32.mir new file mode 100644 index 0000000..d7c0e80 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv32.mir @@ -0,0 +1,1742 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: insertelement_nxv1i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv1i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv1i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv1i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s32) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s32) + %1:_(s32) = COPY $x11 + %4:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %3:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32) + $v0 = COPY %3(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv2i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s32) = G_CONSTANT i32 1 + %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv2i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv2i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv2i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s32) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s32) + %1:_(s32) = COPY $x11 + %4:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %3:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32) + $v0 = COPY %3(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s32) = G_CONSTANT i32 2 + %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(s32) = COPY $x10 + %0:_(s1) = G_TRUNC %1(s32) + %3:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %3, %0(s1), %4(s32) + $v0 = COPY %2(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv8i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv8i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv8i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv8i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s32) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s32) + %1:_(s32) = COPY $x11 + %4:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %3:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32) + $v0 = COPY %3(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv16i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s32) = G_CONSTANT i32 15 + %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv16i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32) + $v0 = COPY %0(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv16i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv16i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s32) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s32) + %1:_(s32) = COPY $x11 + %4:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %3:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32) + $v0 = COPY %3(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_3 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0, $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i1_3 + ; CHECK: liveins: $v0, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s1), [[C]](s32) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(<vscale x 4 x s1>) = COPY $v0 + %2:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %2(s32) + %4:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %0, %1(s1), %4(s32) + $v0 = COPY %3(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv1i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s32) + %3:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32) + $v8 = COPY %2(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s32) + %3:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32) + $v8 = COPY %2(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s32) + %3:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32) + $v8 = COPY %2(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s32) + %3:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32) + $v8 = COPY %2(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv16i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv16i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv16i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: insertelement_nxv16i8_2 + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[COPY1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %2:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $x11 + %4:_(s32) = COPY $x12 + %1:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %7:_(s32) = G_TRUNC %1(s64) + %5:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %6, %0(s8), %7(s32) + $v8m2 = COPY %5(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i8_3 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8, $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i8_3 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s8), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %2:_(s32) = COPY $x10 + %1:_(s8) = G_TRUNC %2(s32) + %4:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %0, %1(s8), %4(s32) + $v8 = COPY %3(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s32) + %3:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32) + $v8 = COPY %2(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s32) = G_CONSTANT i32 1 + %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s32) + %3:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32) + $v8 = COPY %2(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s32) + %3:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32) + $v8 = COPY %2(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv8i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv8i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s32) + %3:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32) + $v8m2 = COPY %2(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv16i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv16i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv16i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv16i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s32) + %3:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %4:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32) + $v8m4 = COPY %2(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8, $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %2:_(s32) = COPY $x10 + %1:_(s16) = G_TRUNC %2(s32) + %4:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %0, %1(s16), %4(s32) + $v8 = COPY %3(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(s32) = COPY $x10 + %2:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + $v8 = COPY %1(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(s32) = COPY $x10 + %2:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + $v8 = COPY %1(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(s32) = COPY $x10 + %2:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + $v8m2 = COPY %1(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv8i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv8i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv8i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(s32) = COPY $x10 + %2:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + $v8m4 = COPY %1(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv16i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv16i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv16i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv16i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(s32) = COPY $x10 + %2:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + $v8m8 = COPY %1(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: insertelement_nxv4i32 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s32>) = COPY $v8m2 + %1:_(s32) = COPY $x10 + %3:_(s32) = G_CONSTANT i32 0 + %2:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %3(s32) + $v8m2 = COPY %2(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv1i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv1i64_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = COPY $x10 + %2:_(s32) = COPY $x11 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %5:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32) + $v8 = COPY %3(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv2i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv2i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv2i64_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s32) = COPY $x10 + %2:_(s32) = COPY $x11 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %5:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32) + $v8m2 = COPY %3(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv4i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv4i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv4i64_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s32) = COPY $x10 + %2:_(s32) = COPY $x11 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %5:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32) + $v8m4 = COPY %3(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv8i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv8i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s32) = G_CONSTANT i32 0 + %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv8i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv8i64_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(s32) = COPY $x10 + %2:_(s32) = COPY $x11 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %5:_(s32) = G_CONSTANT i32 0 + %3:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32) + $v8m8 = COPY %3(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv4i64 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $v8m4 + + ; CHECK-LABEL: name: insertelement_nxv4i64 + ; CHECK: liveins: $x10, $x11, $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s64>) = COPY $v8m4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[MV]](s64), [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(<vscale x 4 x s64>) = COPY $v8m4 + %2:_(s32) = COPY $x10 + %3:_(s32) = COPY $x11 + %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 0 + %4:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %0, %1(s64), %5(s32) + $v8m4 = COPY %4(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir new file mode 100644 index 0000000..4c33ddc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir @@ -0,0 +1,1731 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: insertelement_nxv1i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv1i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv1i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv1i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s64) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s32) = G_TRUNC %3(s64) + %5:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF + %6:_(s64) = G_ZEXT %1(s32) + %4:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64) + $v0 = COPY %4(<vscale x 1 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv2i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_CONSTANT i64 1 + %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv2i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv2i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv2i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s64) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s32) = G_TRUNC %3(s64) + %5:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF + %6:_(s64) = G_ZEXT %1(s32) + %4:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64) + $v0 = COPY %4(<vscale x 2 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_CONSTANT i64 2 + %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(s64) = COPY $x10 + %0:_(s1) = G_TRUNC %1(s64) + %3:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %3, %0(s1), %4(s64) + $v0 = COPY %2(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv8i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv8i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv8i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv8i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s64) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s32) = G_TRUNC %3(s64) + %5:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF + %6:_(s64) = G_ZEXT %1(s32) + %4:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64) + $v0 = COPY %4(<vscale x 8 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv16i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i1_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_CONSTANT i64 15 + %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv16i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i1_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %2:_(s1) = G_CONSTANT i1 true + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64) + $v0 = COPY %0(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv16i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv16i1_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %2:_(s64) = COPY $x10 + %0:_(s1) = G_TRUNC %2(s64) + %3:_(s64) = COPY $x11 + %1:_(s32) = G_TRUNC %3(s64) + %5:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF + %6:_(s64) = G_ZEXT %1(s32) + %4:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64) + $v0 = COPY %4(<vscale x 16 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv4i1_3 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0, $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i1_3 + ; CHECK: liveins: $v0, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s1), [[C]](s64) + ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>) + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(<vscale x 4 x s1>) = COPY $v0 + %2:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %2(s64) + %4:_(s64) = G_CONSTANT i64 0 + %3:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %0, %1(s1), %4(s64) + $v0 = COPY %3(<vscale x 4 x s1>) + PseudoRET implicit $v0 +... +--- +name: insertelement_nxv1i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s64) + %3:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64) + $v8 = COPY %2(<vscale x 1 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s64) + %3:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64) + $v8 = COPY %2(<vscale x 2 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s64) + %3:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64) + $v8 = COPY %2(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8 = COPY %0(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i8_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s64) + %3:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64) + $v8 = COPY %2(<vscale x 8 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv16i8_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i8_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv16i8_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i8_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %2:_(s8) = G_CONSTANT i8 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64) + $v8m2 = COPY %0(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv16i8_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: insertelement_nxv16i8_2 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[COPY1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %2:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %2(s64) + %1:_(s64) = COPY $x11 + %4:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF + %3:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %4, %0(s8), %1(s64) + $v8m2 = COPY %3(<vscale x 16 x s8>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i8_3 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8, $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i8_3 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s8), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s8>) = COPY $v8 + %2:_(s64) = COPY $x10 + %1:_(s8) = G_TRUNC %2(s64) + %4:_(s64) = G_CONSTANT i64 0 + %3:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %0, %1(s8), %4(s64) + $v8 = COPY %3(<vscale x 4 x s8>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8 = COPY %0(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s64) + %3:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64) + $v8 = COPY %2(<vscale x 1 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s64) = G_CONSTANT i64 1 + %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8 = COPY %0(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s64) + %3:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64) + $v8 = COPY %2(<vscale x 2 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8 = COPY %0(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s64) + %3:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64) + $v8 = COPY %2(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv8i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv8i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8m2 = COPY %0(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv8i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s64) + %3:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64) + $v8m2 = COPY %2(<vscale x 8 x s16>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv16i16_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i16_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv16i16_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i16_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %2:_(s16) = G_CONSTANT i16 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64) + $v8m4 = COPY %0(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv16i16_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv16i16_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s64) + %3:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64) + $v8m4 = COPY %2(<vscale x 16 x s16>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8, $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i16 + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(<vscale x 4 x s16>) = COPY $v8 + %2:_(s64) = COPY $x10 + %1:_(s16) = G_TRUNC %2(s64) + %4:_(s64) = G_CONSTANT i64 0 + %3:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %0, %1(s16), %4(s64) + $v8 = COPY %3(<vscale x 4 x s16>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8 = COPY %0(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %1(s64) + %3:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64) + $v8 = COPY %2(<vscale x 1 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8 = COPY %0(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %1(s64) + %3:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64) + $v8 = COPY %2(<vscale x 2 x s32>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv4i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8m2 = COPY %0(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %1(s64) + %3:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64) + $v8m2 = COPY %2(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv8i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv8i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8m4 = COPY %0(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv8i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %1(s64) + %3:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64) + $v8m4 = COPY %2(<vscale x 8 x s32>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv16i32_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i32_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv16i32_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv16i32_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %2:_(s32) = G_CONSTANT i32 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64) + $v8m8 = COPY %0(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv16i32_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv16i32_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %1(s64) + %3:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF + %4:_(s64) = G_CONSTANT i64 0 + %2:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64) + $v8m8 = COPY %2(<vscale x 16 x s32>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $v8m2 + + ; CHECK-LABEL: name: insertelement_nxv4i32 + ; CHECK: liveins: $x10, $v8m2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s32), [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(<vscale x 4 x s32>) = COPY $v8m2 + %2:_(s64) = COPY $x10 + %1:_(s32) = G_TRUNC %2(s64) + %4:_(s64) = G_CONSTANT i64 0 + %3:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %4(s64) + $v8m2 = COPY %3(<vscale x 4 x s32>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv1i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv1i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64) + $v8 = COPY %0(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv1i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv1i64_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_(s64) = COPY $x10 + %2:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) + $v8 = COPY %1(<vscale x 1 x s64>) + PseudoRET implicit $v8 +... +--- +name: insertelement_nxv2i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv2i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv2i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64) + $v8m2 = COPY %0(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv2i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv2i64_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_(s64) = COPY $x10 + %2:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) + $v8m2 = COPY %1(<vscale x 2 x s64>) + PseudoRET implicit $v8m2 +... +--- +name: insertelement_nxv4i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv4i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv4i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64) + $v8m4 = COPY %0(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv4i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv4i64_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_(s64) = COPY $x10 + %2:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) + $v8m4 = COPY %1(<vscale x 4 x s64>) + PseudoRET implicit $v8m4 +... +--- +name: insertelement_nxv8i64_0 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i64_0 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv8i64_1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insertelement_nxv8i64_1 + ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %2:_(s64) = G_CONSTANT i64 -1 + %3:_(s64) = G_CONSTANT i64 0 + %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64) + $v8m8 = COPY %0(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... +--- +name: insertelement_nxv8i64_2 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: insertelement_nxv8i64_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_(s64) = COPY $x10 + %2:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) + $v8m8 = COPY %1(<vscale x 8 x s64>) + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/CodeGen/RISCV/branch-rel.mir b/llvm/test/CodeGen/RISCV/branch-rel.mir new file mode 100644 index 0000000..1ed5f57 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/branch-rel.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc %s -mtriple=riscv64 -run-pass=branch-relaxation -o - -verify-machineinstrs | FileCheck %s + +--- | + define void @foo() { + ret void + } +... +--- +name: foo +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoBR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &".space 4096", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: BGE $x1, $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoRET + bb.0: + liveins: $x1 + BNE $x1, $x0, %bb.3 + PseudoBR %bb.3 + bb.1: + liveins: $x1 + INLINEASM &".space 4096", 1 + BGE $x1, $x0, %bb.3 + bb.3: + PseudoRET +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll index c2b4494..11e7e5c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll @@ -1,16 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s -; CHECK-LABEL: mul_v16i8 -; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 -; CHECK: vector.body: -; CHECK: %index = phi i32 -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 16 -; CHECK: [[LD0:%[^ ]+]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr {{.*}}, i32 4, <16 x i1> [[VCTP]], <16 x i8> undef) -; CHECK: [[LD1:%[^ ]+]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr {{.*}}, i32 4, <16 x i1> [[VCTP]], <16 x i8> undef) -; CHECK: tail call void @llvm.masked.store.v16i8.p0(<16 x i8> {{.*}}, ptr {{.*}}, i32 4, <16 x i1> [[VCTP]]) define dso_local arm_aapcs_vfpcc void @mul_v16i8(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mul_v16i8( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 15 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -16 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 16 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <16 x i8> [[WIDE_MASKED_LOAD2]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v16i8.p0(<16 x i8> [[MUL]], ptr [[TMP6]], i32 4, <16 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 15 @@ -45,17 +70,41 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: mul_v8i16 -; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 -; CHECK: vector.body: -; CHECK: %index = phi i32 -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 8 -; CHECK: [[LD0:%[^ ]+]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) -; CHECK: [[LD1:%[^ ]+]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) -; CHECK: tail call void @llvm.masked.store.v8i16.p0(<8 x i16> {{.*}}, ptr {{.*}}, i32 4, <8 x i1> [[VCTP]]) define dso_local arm_aapcs_vfpcc void @mul_v8i16(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mul_v8i16( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 8 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <8 x i16> [[WIDE_MASKED_LOAD2]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v8i16.p0(<8 x i16> [[MUL]], ptr [[TMP6]], i32 4, <8 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 7 @@ -90,16 +139,41 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: mul_v4i32 -; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 -; CHECK: vector.body: -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 -; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> [[VCTP]]) define dso_local arm_aapcs_vfpcc void @mul_v4i32(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mul_v4i32( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -4 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD2]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[MUL]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 3 @@ -134,17 +208,47 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: split_vector -; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 -; CHECK: vector.body: -; CHECK: %index = phi i32 -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 -; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> [[VCTP]]) define dso_local arm_aapcs_vfpcc void @split_vector(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @split_vector( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -4 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[EXTRACT_1_LOW:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> undef, <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[EXTRACT_1_HIGH:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> undef, <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[EXTRACT_2_LOW:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD2]], <4 x i32> undef, <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[EXTRACT_2_HIGH:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD2]], <4 x i32> undef, <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i32> [[EXTRACT_1_LOW]], [[EXTRACT_2_LOW]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i32> [[EXTRACT_1_HIGH]], [[EXTRACT_2_HIGH]] +; CHECK-NEXT: [[COMBINE:%.*]] = shufflevector <2 x i32> [[MUL]], <2 x i32> [[SUB]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[COMBINE]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 3 @@ -186,14 +290,48 @@ for.cond.cleanup: ; preds = %vector.body, %entry } ; One of the loads now uses ult predicate. -; CHECK-LABEL: mismatch_load_pred -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 -; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> %wrong, <4 x i32> undef) -; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> [[VCTP]]) define dso_local arm_aapcs_vfpcc void @mismatch_load_pred(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mismatch_load_pred( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -4 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 +; CHECK-NEXT: [[WRONG:%.*]] = icmp ult <4 x i32> [[INDUCTION]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[WRONG]], <4 x i32> undef) +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 3 @@ -236,17 +374,48 @@ for.cond.cleanup: ; preds = %vector.body, %entry } ; The store now uses ult predicate. -; CHECK-LABEL: mismatch_store_pred -; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 -; CHECK: vector.body: -; CHECK: %index = phi i32 -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 -; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) -; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> %wrong) define dso_local arm_aapcs_vfpcc void @mismatch_store_pred(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mismatch_store_pred( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -4 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 +; CHECK-NEXT: [[WRONG:%.*]] = icmp ult <4 x i32> [[INDUCTION]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <4 x i1> [[WRONG]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 3 @@ -294,14 +463,72 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; ; Step value 16 doesn't match vector width 4 ; -; CHECK-LABEL: interleave4 -; CHECK: vector.body: -; CHECK: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) -; CHECK: %active.lane.mask{{.*}} = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %v7, i32 %N) -; CHECK: %active.lane.mask{{.*}} = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %v8, i32 %N) -; CHECK: %active.lane.mask{{.*}} = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %v9, i32 %N) -; define dso_local void @interleave4(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local void @interleave4( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: [[V0:%.*]] = add i32 [[N]], 15 +; CHECK-NEXT: [[V1:%.*]] = lshr i32 [[V0]], 4 +; CHECK-NEXT: [[V2:%.*]] = shl nuw i32 [[V1]], 4 +; CHECK-NEXT: [[V3:%.*]] = add i32 [[V2]], -16 +; CHECK-NEXT: [[V4:%.*]] = lshr i32 [[V3]], 4 +; CHECK-NEXT: [[V5:%.*]] = add nuw nsw i32 [[V4]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, ptr [[A]], i32 8 +; CHECK-NEXT: [[SCEVGEP30:%.*]] = getelementptr i32, ptr [[C]], i32 8 +; CHECK-NEXT: [[SCEVGEP37:%.*]] = getelementptr i32, ptr [[B]], i32 8 +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[V5]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[LSR_IV38:%.*]] = phi ptr [ [[SCEVGEP39:%.*]], %[[VECTOR_BODY]] ], [ [[SCEVGEP37]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV31:%.*]] = phi ptr [ [[SCEVGEP32:%.*]], %[[VECTOR_BODY]] ], [ [[SCEVGEP30]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP25:%.*]], %[[VECTOR_BODY]] ], [ [[SCEVGEP]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[V14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[V6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[V15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[V7:%.*]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK15:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[V7]], i32 [[N]]) +; CHECK-NEXT: [[V8:%.*]] = add i32 [[V7]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK16:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[V8]], i32 [[N]]) +; CHECK-NEXT: [[V9:%.*]] = add i32 [[V8]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK17:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[V9]], i32 [[N]]) +; CHECK-NEXT: [[SCEVGEP42:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV38]], i32 -2 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[SCEVGEP42]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[SCEVGEP43:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV38]], i32 -1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP43]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK15]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD19:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[LSR_IV38]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK16]], <4 x i32> undef) +; CHECK-NEXT: [[SCEVGEP41:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV38]], i32 1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD20:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP41]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK17]], <4 x i32> undef) +; CHECK-NEXT: [[SCEVGEP34:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV31]], i32 -2 +; CHECK-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[SCEVGEP34]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[SCEVGEP35:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV31]], i32 -1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP35]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK15]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD23:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[LSR_IV31]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK16]], <4 x i32> undef) +; CHECK-NEXT: [[SCEVGEP36:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV31]], i32 1 +; CHECK-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP36]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK17]], <4 x i32> undef) +; CHECK-NEXT: [[V10:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD21]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[V11:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD22]], [[WIDE_MASKED_LOAD18]] +; CHECK-NEXT: [[V12:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD23]], [[WIDE_MASKED_LOAD19]] +; CHECK-NEXT: [[V13:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD24]], [[WIDE_MASKED_LOAD20]] +; CHECK-NEXT: [[SCEVGEP27:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV]], i32 -2 +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V10]], ptr [[SCEVGEP27]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV]], i32 -1 +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V11]], ptr [[SCEVGEP28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK15]]) +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V12]], ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK16]]) +; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV]], i32 1 +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V13]], ptr [[SCEVGEP29]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK17]]) +; CHECK-NEXT: [[SCEVGEP25]] = getelementptr i32, ptr [[LSR_IV]], i32 16 +; CHECK-NEXT: [[SCEVGEP32]] = getelementptr i32, ptr [[LSR_IV31]], i32 16 +; CHECK-NEXT: [[SCEVGEP39]] = getelementptr i32, ptr [[LSR_IV38]], i32 16 +; CHECK-NEXT: [[V14]] = add i32 [[V9]], 4 +; CHECK-NEXT: [[V15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[V6]], i32 1) +; CHECK-NEXT: [[V16:%.*]] = icmp ne i32 [[V15]], 0 +; CHECK-NEXT: br i1 [[V16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp sgt i32 %N, 0 %v0 = add i32 %N, 15 @@ -370,12 +597,42 @@ for.cond.cleanup: ret void } -; CHECK-LABEL: const_expected_in_set_loop -; CHECK: call <4 x i1> @llvm.get.active.lane.mask -; CHECK-NOT: vctp -; CHECK: ret void -; define dso_local void @const_expected_in_set_loop(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local void @const_expected_in_set_loop( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -4 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[LSR_IV17:%.*]] = phi ptr [ [[SCEVGEP18:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], %[[VECTOR_BODY]] ], [ [[C]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 42) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV14]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP7]], ptr [[LSR_IV17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[SCEVGEP18]] = getelementptr i32, ptr [[LSR_IV17]], i32 4 +; CHECK-NEXT: [[TMP8]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -413,12 +670,42 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: tripcount_arg_not_invariant -; CHECK: call <4 x i1> @llvm.get.active.lane.mask -; CHECK-NOT: vctp -; CHECK: ret void -; define dso_local void @tripcount_arg_not_invariant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local void @tripcount_arg_not_invariant( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -4 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[LSR_IV17:%.*]] = phi ptr [ [[SCEVGEP18:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], %[[VECTOR_BODY]] ], [ [[C]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[INDEX]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV14]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP7]], ptr [[LSR_IV17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[SCEVGEP18]] = getelementptr i32, ptr [[LSR_IV17]], i32 4 +; CHECK-NEXT: [[TMP8]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_BODY]], label %[[VECTOR_PH]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -458,12 +745,42 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: addrec_base_not_zero -; CHECK: call <4 x i1> @llvm.get.active.lane.mask -; CHECK-NOT: vctp -; CHECK: ret void -; define dso_local void @addrec_base_not_zero(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local void @addrec_base_not_zero( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -4 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[LSR_IV17:%.*]] = phi ptr [ [[SCEVGEP18:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], %[[VECTOR_BODY]] ], [ [[C]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 1, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV14]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP7]], ptr [[LSR_IV17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[SCEVGEP18]] = getelementptr i32, ptr [[LSR_IV17]], i32 4 +; CHECK-NEXT: [[TMP8]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_BODY]], label %[[VECTOR_PH]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll index fa6a66b..9775cf9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll @@ -1,15 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name INST --version 6 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s -; CHECK-LABEL: reduction_i32 -; CHECK: phi i32 [ 0, %vector.ph ] -; CHECK: phi <8 x i16> [ zeroinitializer, %vector.ph ] -; CHECK: phi i32 -; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]]) -; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8 -; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) -; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp5, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) define i16 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, i32 %N) { +; CHECK-LABEL: define i16 @reduction_i32( +; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N_VEC]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INSTTMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = sub i32 [[TMP4]], 8 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP2]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef) +; CHECK-NEXT: [[INSTTMP5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP5]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef) +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i16> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = add <8 x i16> [[TMP7]], [[WIDE_MASKED_LOAD3]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP9]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[VEC_PHI_LCSSA:%.*]] = phi <8 x i16> [ [[VEC_PHI]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[DOTLCSSA3:%.*]] = phi <8 x i1> [ [[TMP5]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <8 x i16> [ [[TMP8]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[DOTLCSSA3]], <8 x i16> [[DOTLCSSA]], <8 x i16> [[VEC_PHI_LCSSA]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP10]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[RDX_SHUF]], [[TMP10]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <8 x i16> [[RDX_SHUF4]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_SHUF6:%.*]] = shufflevector <8 x i16> [[BIN_RDX5]], <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <8 x i16> [[RDX_SHUF6]], [[BIN_RDX5]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[BIN_RDX7]], i32 0 +; CHECK-NEXT: ret i16 [[TMP11]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: [[RES_0:%.*]] = phi i16 [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i16 [[RES_0]] +; entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %vector.ph @@ -59,16 +99,52 @@ for.cond.cleanup: ret i16 %res.0 } -; CHECK-LABEL: reduction_i32_with_scalar -; CHECK: vector.body: -; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %{{.*}}, %vector.body ] -; CHECK: %{{.*}} = phi i32 [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] -; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]]) -; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8 -; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) define i16 @reduction_i32_with_scalar(ptr nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr { +; CHECK-LABEL: define i16 @reduction_i32_with_scalar( +; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i16 [[B:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i32 [[TMP]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N_VEC]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[INSTTMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INSTTMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = sub i32 [[TMP4]], 8 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP2]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef) +; CHECK-NEXT: [[INSTTMP5:%.*]] = add <8 x i16> [[VEC_PHI]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[INSTTMP6]] = add <8 x i16> [[INSTTMP5]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP7]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[INSTTMP8:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[INSTTMP6]], <8 x i16> [[VEC_PHI]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[INSTTMP8]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[RDX_SHUF]], [[INSTTMP8]] +; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <8 x i16> [[RDX_SHUF5]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <8 x i16> [[BIN_RDX6]], <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <8 x i16> [[RDX_SHUF7]], [[BIN_RDX6]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[BIN_RDX8]], i32 0 +; CHECK-NEXT: ret i16 [[TMP9]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: [[RES_0:%.*]] = phi i16 [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i16 [[RES_0]] +; entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %vector.ph @@ -119,15 +195,46 @@ for.cond.cleanup: ; despite this we can still calculate a precise enough range so that the ; the overflow checks for get.active.active.lane.mask don't reject ; tail-predication. -; -; CHECK-LABEL: @reduction_not_guarded -; -; CHECK: vector.body: -; CHECK: @llvm.arm.mve.vctp -; CHECK-NOT: @llvm.get.active.lane.mask.v8i1.i32 -; CHECK: ret -; define i16 @reduction_not_guarded(ptr nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr { +; CHECK-LABEL: define i16 @reduction_not_guarded( +; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i16 [[B:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i32 [[TMP]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N_VEC]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[INSTTMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INSTTMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = sub i32 [[TMP4]], 8 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP2]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef) +; CHECK-NEXT: [[INSTTMP5:%.*]] = add <8 x i16> [[VEC_PHI]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[INSTTMP6]] = add <8 x i16> [[INSTTMP5]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP7]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[INSTTMP8:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[INSTTMP6]], <8 x i16> [[VEC_PHI]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[INSTTMP8]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[RDX_SHUF]], [[INSTTMP8]] +; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <8 x i16> [[RDX_SHUF5]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <8 x i16> [[BIN_RDX6]], <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <8 x i16> [[RDX_SHUF7]], [[BIN_RDX6]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[BIN_RDX8]], i32 0 +; CHECK-NEXT: ret i16 [[TMP9]] +; entry: %tmp = add i32 %N, -1 %n.rnd.up = add nuw nsw i32 %tmp, 8 @@ -166,12 +273,76 @@ middle.block: ; preds = %vector.body ret i16 %tmp9 } -; CHECK-LABEL: @Correlation -; CHECK: vector.body: -; CHECK: @llvm.arm.mve.vctp -; CHECK-NOT: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask -; define dso_local void @Correlation(ptr nocapture readonly %Input, ptr nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local void @Correlation( +; CHECK-SAME: ptr readonly captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]], i16 signext [[SIZE:%.*]], i16 signext [[N:%.*]], i16 signext [[SCALE:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[N]] to i32 +; CHECK-NEXT: [[CMP36:%.*]] = icmp sgt i16 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP36]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END17:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[SIZE]] to i32 +; CHECK-NEXT: [[CONV1032:%.*]] = zext i16 [[SCALE]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[CONV2]], 3 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV51:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], %[[FOR_END:.*]] ], [ [[TMP0]], %[[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[LSR_IV46:%.*]] = phi ptr [ [[SCEVGEP47:%.*]], %[[FOR_END]] ], [ [[INPUT]], %[[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[I_037:%.*]] = phi i32 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC16:%.*]], %[[FOR_END]] ] +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[I_037]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[CONV2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i32 [[I_037]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -4 +; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i32 [[TMP8]], 1 +; CHECK-NEXT: [[CMP433:%.*]] = icmp slt i32 [[I_037]], [[CONV2]] +; CHECK-NEXT: br i1 [[CMP433]], label %[[VECTOR_PH:.*]], label %[[FOR_END]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP9]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[LSR_IV48:%.*]] = phi ptr [ [[SCEVGEP49:%.*]], %[[VECTOR_BODY]] ], [ [[LSR_IV46]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[INPUT]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP13]] = sub i32 [[TMP11]], 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[LSR_IV]], i32 2, <4 x i1> [[TMP12]], <4 x i16> undef) +; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32> +; CHECK-NEXT: [[WIDE_MASKED_LOAD42:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[LSR_IV48]], i32 2, <4 x i1> [[TMP12]], <4 x i16> undef) +; CHECK-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD42]] to <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> undef, i32 [[CONV1032]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <4 x i32> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP20]] = add <4 x i32> [[TMP19]], [[VEC_PHI]] +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i16, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP49]] = getelementptr i16, ptr [[LSR_IV48]], i32 4 +; CHECK-NEXT: [[TMP21]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP10]], i32 1) +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> [[TMP20]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]]) +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP24]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[TMP25:%.*]] = lshr i32 [[SUM_0_LCSSA]], 16 +; CHECK-NEXT: [[CONV13:%.*]] = trunc i32 [[TMP25]] to i16 +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[OUTPUT]], i32 [[I_037]] +; CHECK-NEXT: store i16 [[CONV13]], ptr [[ARRAYIDX14]], align 2 +; CHECK-NEXT: [[INC16]] = add nuw nsw i32 [[I_037]], 1 +; CHECK-NEXT: [[SCEVGEP47]] = getelementptr i16, ptr [[LSR_IV46]], i32 1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV51]], -1 +; CHECK-NEXT: [[EXITCOND39:%.*]] = icmp eq i32 [[INC16]], [[CONV]] +; CHECK-NEXT: br i1 [[EXITCOND39]], label %[[FOR_END17]], label %[[FOR_BODY]] +; CHECK: [[FOR_END17]]: +; CHECK-NEXT: ret void +; entry: %conv = sext i16 %N to i32 %cmp36 = icmp sgt i16 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll index a8ad360..b54d526 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll @@ -1,8 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s -; CHECK-LABEL: expand_v8i16_v8i32 -; CHECK-NOT: call i32 @llvm.arm.mve.vctp define void @expand_v8i16_v8i32(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define void @expand_v8i16_v8i32( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 8 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef) +; CHECK-NEXT: [[EXPAND_1:%.*]] = zext <8 x i16> [[WIDE_MASKED_LOAD]] to <8 x i32> +; CHECK-NEXT: [[EXPAND_2:%.*]] = zext <8 x i16> [[WIDE_MASKED_LOAD2]] to <8 x i32> +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <8 x i32> [[EXPAND_2]], [[EXPAND_1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> [[MUL]], ptr [[TMP6]], i32 4, <8 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 7 @@ -39,15 +74,57 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: expand_v8i16_v4i32 -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS_REM:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[ELEMS]]) -; CHECK: [[ELEMS_REM]] = sub i32 [[ELEMS]], 8 -; CHECK: tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) -; CHECK: %store.pred = icmp ule <4 x i32> %induction.store -; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> %store.pred) -; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> %store.pred) define void @expand_v8i16_v4i32(ptr readonly %a, ptr readonly %b, ptr %c, ptr %d, i32 %N) { +; CHECK-LABEL: define void @expand_v8i16_v4i32( +; CHECK-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 7 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10_STORE:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11_STORE:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10_STORE]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STORE_IDX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[STORE_IDX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 8 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef) +; CHECK-NEXT: [[EXTRACT_2_LOW:%.*]] = shufflevector <8 x i16> [[WIDE_MASKED_LOAD2]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[EXTRACT_2_HIGH:%.*]] = shufflevector <8 x i16> [[WIDE_MASKED_LOAD2]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[EXPAND_1:%.*]] = zext <4 x i16> [[EXTRACT_2_LOW]] to <4 x i32> +; CHECK-NEXT: [[EXPAND_2:%.*]] = zext <4 x i16> [[EXTRACT_2_HIGH]] to <4 x i32> +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <4 x i32> [[EXPAND_2]], [[EXPAND_1]] +; CHECK-NEXT: [[SUB:%.*]] = mul nsw <4 x i32> [[EXPAND_1]], [[EXPAND_2]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT_STORE:%.*]] = insertelement <4 x i32> undef, i32 [[STORE_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT_STORE:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT_STORE]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION_STORE:%.*]] = add <4 x i32> [[BROADCAST_SPLAT_STORE]], <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[STORE_PRED:%.*]] = icmp ule <4 x i32> [[INDUCTION_STORE]], [[BROADCAST_SPLAT11_STORE]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[STORE_IDX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[MUL]], ptr [[TMP6]], i32 4, <4 x i1> [[STORE_PRED]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[D]], i32 [[STORE_IDX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[SUB]], ptr [[GEP]], i32 4, <4 x i1> [[STORE_PRED]]) +; CHECK-NEXT: [[STORE_IDX_NEXT]] = add i32 [[STORE_IDX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 7 @@ -98,9 +175,43 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: expand_v4i32_v4i64 -; CHECK-NOT: call i32 @llvm.arm.mve.vctp define void @expand_v4i32_v4i64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) { +; CHECK-LABEL: define void @expand_v4i32_v4i64( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -4 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[EXPAND_1:%.*]] = zext <4 x i32> [[WIDE_MASKED_LOAD]] to <4 x i64> +; CHECK-NEXT: [[EXPAND_2:%.*]] = zext <4 x i32> [[WIDE_MASKED_LOAD2]] to <4 x i64> +; CHECK-NEXT: [[MUL:%.*]] = mul nsw <4 x i64> [[EXPAND_2]], [[EXPAND_1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[C]], i32 [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.store.v4i64.p0(<4 x i64> [[MUL]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; entry: %cmp8 = icmp eq i32 %N, 0 %tmp8 = add i32 %N, 3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll index ec542df..fb1a4a4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -1,24 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s -; CHECK-LABEL: vec_mul_reduce_add - -; CHECK: vector.ph: -; CHECK: %start = call i32 @llvm.start.loop.iterations.i32 -; CHECK: br label %vector.body - -; CHECK: vector.body: -; CHECK: [[ELTS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[SUB:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELTS]]) -; CHECK: [[SUB]] = sub i32 [[ELTS]], 4 -; CHECK: call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]] -; CHECK: call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], - -; CHECK: middle.block: -; CHECK: [[VPSEL:%[^ ]+]] = select <4 x i1> [[VCTP]], -; CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]]) - define i32 @vec_mul_reduce_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { +; CHECK-LABEL: define i32 @vec_mul_reduce_add( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -4 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]]) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP7]]) +; CHECK-NEXT: [[TMP9]] = sub i32 [[TMP7]], 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV2]], i32 4, <4 x i1> [[TMP8]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP8]], <4 x i32> undef) +; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD13]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP11]] = add nsw <4 x i32> [[TMP10]], [[VEC_PHI]] +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i32, ptr [[LSR_IV2]], i32 4 +; CHECK-NEXT: [[TMP12]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1) +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]]) +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RES_0_LCSSA]] +; entry: %cmp8 = icmp eq i32 %N, 0 %0 = add i32 %N, 3 diff --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll index 04a2268..314e1b4 100644 --- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll +++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll @@ -1,5 +1,6 @@ ; RUN: opt -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -verify-machineinstrs -o - | FileCheck %s ; RUN: opt -mattr=+simd128 -passes=loop-vectorize -vectorizer-maximize-bandwidth %s | llc -mtriple=wasm32 -mattr=+simd128 -verify-machineinstrs -o - | FileCheck %s --check-prefix=MAX-BANDWIDTH +; RUN: opt -mattr=+simd128,+relaxed-simd -passes=loop-vectorize -vectorizer-maximize-bandwidth %s | llc -mtriple=wasm32 -mattr=+simd128,+relaxed-simd -verify-machineinstrs -o - | FileCheck %s --check-prefix=RELAXED-MAX-BANDWIDTH target triple = "wasm32" @@ -23,6 +24,10 @@ define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture n ; MAX-BANDWIDTH: i32x4.add ; MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.relaxed_dot_i8x16_i7x16_add_s + entry: %cmp7.not = icmp eq i32 %N, 0 br i1 %cmp7.not, label %for.cond.cleanup, label %for.body @@ -47,6 +52,109 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } +define hidden i32 @i32_mac_u8_s8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) { +; CHECK-LABEL: i32_mac_u8_s8: +; CHECK: loop +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_u +; CHECK: i32x4.extend_low_i16x8_u +; CHECK: v128.load32_zero +; CHECK: i16x8.extend_low_i8x16_s +; CHECK: i32x4.extend_low_i16x8_s +; CHECK: i32x4.mul +; CHECK: i32x4.add + +; MAX-BANDWIDTH: loop +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; MAX-BANDWIDTH: v128.load +; MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.mul +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.mul +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.mul +; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; MAX-BANDWIDTH: i32x4.mul +; MAX-BANDWIDTH: i32x4.add + +; RELAXED-MAX-BANDWIDTH: loop +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_u +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i16x8.extend_low_i8x16_s +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i32x4.add +entry: + %cmp7.not = icmp eq i32 %N, 0 + br i1 %cmp7.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %res.0.lcssa + +for.body: ; preds = %entry, %for.body + %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %res.08 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %a, i32 %i.09 + %0 = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds i8, ptr %b, i32 %i.09 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = zext i8 %1 to i32 + %mul = mul nsw i32 %conv2, %conv + %add = add nsw i32 %mul, %res.08 + %inc = add nuw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + define hidden i32 @i32_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) { ; CHECK-LABEL: i32_mac_s16: ; CHECK: i32x4.load16x4_s 0:p2align=1 @@ -57,6 +165,12 @@ define hidden i32 @i32_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture ; MAX-BANDWIDTH: v128.load ; MAX-BANDWIDTH: v128.load ; MAX-BANDWIDTH: i32x4.dot_i16x8_s +; MAX-BANDWIDTH: i32x4.add + +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.dot_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.add entry: %cmp7.not = icmp eq i32 %N, 0 @@ -116,6 +230,31 @@ define hidden i64 @i64_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture ; MAX-BANDWIDTH: i64x2.extmul_low_i32x4_s ; MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_s +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_s +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_s +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_s +; RELAXED-MAX-BANDWIDTH: i64x2.add + entry: %cmp7.not = icmp eq i32 %N, 0 br i1 %cmp7.not, label %for.cond.cleanup, label %for.body @@ -156,6 +295,14 @@ define hidden i64 @i64_mac_s32(ptr nocapture noundef readonly %a, ptr nocapture ; MAX-BANDWIDTH: i64x2.extend_low_i32x4_s ; MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i64x2.extend_high_i32x4_s +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i64x2.extend_low_i32x4_s +; RELAXED-MAX-BANDWIDTH: i64x2.add + entry: %cmp6.not = icmp eq i32 %N, 0 br i1 %cmp6.not, label %for.cond.cleanup, label %for.body @@ -197,6 +344,15 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n ; MAX-BANDWIDTH: i32x4.add ; MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i16x8.extmul_low_i8x16_u +; RELAXED-MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; RELAXED-MAX-BANDWIDTH: i16x8.extmul_high_i8x16_u +; RELAXED-MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u +; RELAXED-MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: i32x4.add + entry: %cmp7.not = icmp eq i32 %N, 0 br i1 %cmp7.not, label %for.cond.cleanup, label %for.body @@ -235,6 +391,13 @@ define hidden i32 @i32_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture ; MAX-BANDWIDTH: i32x4.add ; MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.extmul_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i32x4.extmul_high_i16x8_u +; RELAXED-MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: i32x4.add + entry: %cmp7.not = icmp eq i32 %N, 0 br i1 %cmp7.not, label %for.cond.cleanup, label %for.body @@ -277,6 +440,17 @@ define hidden i32 @i32_mac_u16_s16(ptr nocapture noundef readonly %a, ptr nocapt ; MAX-BANDWIDTH: i32x4.add ; MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.extend_high_i16x8_s +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.extend_high_i16x8_u +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_s +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i32x4.add +; RELAXED-MAX-BANDWIDTH: i32x4.add + entry: %cmp7.not = icmp eq i32 %N, 0 br i1 %cmp7.not, label %for.cond.cleanup, label %for.body @@ -335,6 +509,32 @@ define hidden i64 @i64_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture ; MAX-BANDWIDTH: i64x2.extmul_low_i32x4_u ; MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_u +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_u +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i8x16.shuffle 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_u +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i32x4.extend_low_i16x8_u +; RELAXED-MAX-BANDWIDTH: i64x2.extmul_low_i32x4_u +; RELAXED-MAX-BANDWIDTH: i64x2.add + + entry: %cmp8.not = icmp eq i32 %N, 0 br i1 %cmp8.not, label %for.cond.cleanup, label %for.body @@ -375,6 +575,14 @@ define hidden i64 @i64_mac_u32(ptr nocapture noundef readonly %a, ptr nocapture ; MAX-BANDWIDTH: i64x2.extend_low_i32x4_u ; MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: v128.load +; RELAXED-MAX-BANDWIDTH: i32x4.mul +; RELAXED-MAX-BANDWIDTH: i64x2.extend_high_i32x4_u +; RELAXED-MAX-BANDWIDTH: i64x2.add +; RELAXED-MAX-BANDWIDTH: i64x2.extend_low_i32x4_u +; RELAXED-MAX-BANDWIDTH: i64x2.add + entry: %cmp6.not = icmp eq i32 %N, 0 br i1 %cmp6.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll b/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll new file mode 100644 index 0000000..ce7024d --- /dev/null +++ b/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll @@ -0,0 +1,34 @@ +; RUN: llc --code-model=kernel < %s -asm-verbose=0 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: func_no_abs_sym +define i64 @func_no_abs_sym() nounwind { + ; CHECK: movq $no_abs_sym, %rax + %1 = ptrtoint ptr @no_abs_sym to i64 + ret i64 %1 +} + +; CHECK-LABEL: func_abs_sym +define i64 @func_abs_sym() nounwind { + ; CHECK: movabsq $abs_sym, %rax + %1 = ptrtoint ptr @abs_sym to i64 + ret i64 %1 +} + +; CHECK-LABEL: func_abs_sym_in_range +define i64 @func_abs_sym_in_range() nounwind { + ;; The absolute_symbol range fits in 32 bits but we still use movabs + ;; since there's no benefit to using the sign extending instruction + ;; with absolute symbols. + ; CHECK: movabsq $abs_sym_in_range, %rax + %1 = ptrtoint ptr @abs_sym_in_range to i64 + ret i64 %1 +} + +@no_abs_sym = external hidden global [0 x i8] +@abs_sym = external hidden global [0 x i8], !absolute_symbol !0 +@abs_sym_in_range = external hidden global [0 x i8], !absolute_symbol !1 + +!0 = !{i64 -1, i64 -1} ;; Full range +!1 = !{i64 -2147483648, i64 2147483648} ;; In range diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index af9d944..de9caa5 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -235,9 +235,10 @@ define void @and_cond(i32 %a, i1 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: setg %al +; CHECK-NEXT: notb %sil ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %al, %sil -; CHECK-NEXT: cfcmovel %ecx, 0 +; CHECK-NEXT: cfcmovnel %ecx, 0 ; CHECK-NEXT: retq %is_pos = icmp sgt i32 %a, 0 %not_b = xor i1 %b, true diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll index 2aea9c1..632d90d 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll @@ -27,7 +27,7 @@ entry: !1 = !{i64 0, !"_ZTSFivE.generalized"} !2 = !{i64 0, !"_ZTSFviE.generalized"} -; CHECK: .section .callgraph,"o",@progbits,.text +; CHECK: .section .llvm.callgraph,"o",@progbits,.text ;; Version ; CHECK-NEXT: .byte 0 ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll index 1aabf66..ed6849a 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -1,8 +1,8 @@ ;; Test if temporary labels are generated for each indirect callsite. -;; Test if the .callgraph section contains the MD5 hash of callees' type (type id) +;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id) ;; is correctly paired with its corresponding temporary label generated for indirect ;; call sites annotated with !callee_type metadata. -;; Test if the .callgraph section contains unique direct callees. +;; Test if the .llvm.callgraph section contains unique direct callees. ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s @@ -36,7 +36,7 @@ entry: !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} -; CHECK: .section .callgraph,"o",@progbits,.text +; CHECK: .section .llvm.callgraph,"o",@progbits,.text ;; Version ; CHECK-NEXT: .byte 0 ;; Flags diff --git a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll index 34dc5b8..49cc335 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll @@ -1,7 +1,10 @@ -;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls. +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls. + +; REQUIRES: x86-registered-target +; REQUIRES: arm-registered-target ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { entry: @@ -27,7 +30,7 @@ declare !type !2 i32 @bar(i8 signext) !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{i64 0, !"_ZTSFiiE.generalized"} -; CHECK: Hex dump of section '.callgraph': +; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00000000 00008e19 0b7f3326 ; CHECK-NEXT: 0x00000010 e3000154 86bc5981 4b8e3000 05000000 ;; Verify that the type id 0x308e4b8159bc8654 is in section. diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll index c144a24..8a1c6ca 100644 --- a/llvm/test/CodeGen/X86/call-graph-section.ll +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -1,7 +1,10 @@ -;; Tests that we store the type identifiers in .callgraph section of the object file. +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file. + +; REQUIRES: x86-registered-target +; REQUIRES: arm-registered-target ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s declare !type !0 void @foo() @@ -31,7 +34,7 @@ entry: ;; Make sure following type IDs are in call graph section ;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 -; CHECK: Hex dump of section '.callgraph': +; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000000 ; CHECK-NEXT: 0x00000010 00000324 44f731f5 eecb3e54 86bc5981 ; CHECK-NEXT: 0x00000020 4b8e307a de6814f8 97fd77 diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index 5571519..c90344b8 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -502,11 +502,11 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; SSE2-NEXT: psubw %xmm1, %xmm0 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,0,0] ; SSE2-NEXT: paddw %xmm1, %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,0,65535,65535,0] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,16,0,8,8,0,0,0,0,0,2,0,2,0,0,0] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -517,7 +517,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; SSE41-NEXT: psubw %xmm1, %xmm0 -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,0,0] ; SSE41-NEXT: paddw %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4096,2048,8,u,u,2,2,u] ; SSE41-NEXT: pmulhuw %xmm0, %xmm1 @@ -530,7 +530,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,0,0] ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4096,2048,8,u,u,2,2,u] ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6],xmm0[7] @@ -541,7 +541,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0 -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,0,0] ; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq @@ -630,7 +630,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [171,0,0,0] ; SSE2-NEXT: psrlw $15, %xmm0 ; SSE2-NEXT: pandn %xmm0, %xmm1 ; SSE2-NEXT: por %xmm2, %xmm1 @@ -641,7 +641,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [171,0,0,0] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm2, %xmm2 ; SSE41-NEXT: psrlw $7, %xmm2 @@ -654,7 +654,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; AVX-LABEL: combine_vec_udiv_nonuniform4: ; AVX: # %bb.0: ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [171,0,0,0] ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX-NEXT: vpackuswb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsrlw $7, %xmm1, %xmm1 @@ -691,7 +691,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; SSE2-NEXT: psubw %xmm3, %xmm0 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [u,32768,0,0,0,0,0,32768] ; SSE2-NEXT: paddw %xmm3, %xmm0 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [u,u,0,4,0,4,16,0,4,0,0,4,0,0,0,16] ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 ; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll index f3950b7..b2b0a6d 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -1,17 +1,101 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -;; A minimal test case. Subsequent PRs will expand on this test case -;; (e.g., with more functions, variables and profiles) and test the hotness -;; reconcillation implementation. +;; Requires asserts for -debug-only. +; REQUIRES: asserts + +; RUN: rm -rf %t && split-file %s %t && cd %t + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: -partition-static-data-sections=true \ +; RUN: -debug-only=static-data-profile-info \ +; RUN: -data-sections=true -unique-section-names=false \ +; RUN: input-with-data-access-prof-on.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR + ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ ; RUN: -partition-static-data-sections=true \ +; RUN: -debug-only=static-data-profile-info \ ; RUN: -data-sections=true -unique-section-names=false \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=IR +; RUN: input-with-data-access-prof-off.ll -o - 2>&1 | FileCheck %s --check-prefixes=OFF + +; LOG: hot_bss has section prefix hot, the max from data access profiles as hot and PGO counters as hot +; LOG: data_unknown_hotness has section prefix <empty>, the max from data access profiles as <empty> and PGO counters as unlikely +; LOG: external_relro_array has section prefix unlikely, solely from data access profiles + +; IR: .type hot_bss,@object +; IR-NEXT: .section .bss.hot.,"aw" +; IR: .type data_unknown_hotness,@object +; IR-NEXT: .section .data,"aw" +; IR: .type external_relro_array,@object +; IR-NEXT: .section .data.rel.ro.unlikely.,"aw" + + +; OFF: .type hot_bss,@object +; OFF-NEXT: .section .bss.hot.,"aw" +; OFF: .type data_unknown_hotness,@object +; OFF-NEXT: .section .data.unlikely.,"aw" +;; Global variable section prefix metadata is not used when +;; module flag `EnableDataAccessProf` is 0, and @external_relro_array has +;; external linkage, so analysis based on PGO counters doesn't apply. +; OFF: .type external_relro_array,@object # @external_relro_array +; OFF-NEXT: .section .data.rel.ro,"aw" + +;--- input-with-data-access-prof-on.ll +; Internal vars +@hot_bss = internal global i32 0, !section_prefix !17 +@data_unknown_hotness = internal global i32 1 +; External vars +@external_relro_array = constant [2 x ptr] [ptr @hot_bss, ptr @data_unknown_hotness], !section_prefix !18 + +define void @cold_func() !prof !15 { + %9 = load i32, ptr @data_unknown_hotness + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +define void @hot_func() !prof !14 { + %9 = load i32, ptr @hot_bss + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +declare i32 @func_taking_arbitrary_param(...) -; IR: .section .bss.hot.,"aw" +!llvm.module.flags = !{!0, !1} +!0 = !{i32 2, !"EnableDataAccessProf", i32 1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} +!18 = !{!"section_prefix", !"unlikely"} + +;--- input-with-data-access-prof-off.ll +; Same as file above except that module flag `EnableDataAccessProf` has value 0. +; Internal vars @hot_bss = internal global i32 0, !section_prefix !17 +@data_unknown_hotness = internal global i32 1 +; External vars +@external_relro_array = constant [2 x ptr] [ptr @hot_bss, ptr @data_unknown_hotness], !section_prefix !18 + +define void @cold_func() !prof !15 { + %9 = load i32, ptr @data_unknown_hotness + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} define void @hot_func() !prof !14 { %9 = load i32, ptr @hot_bss @@ -21,8 +105,9 @@ define void @hot_func() !prof !14 { declare i32 @func_taking_arbitrary_param(...) -!llvm.module.flags = !{!1} +!llvm.module.flags = !{!0, !1} +!0 = !{i32 2, !"EnableDataAccessProf", i32 0} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} !3 = !{!"ProfileFormat", !"InstrProf"} @@ -40,3 +125,4 @@ declare i32 @func_taking_arbitrary_param(...) !15 = !{!"function_entry_count", i64 1} !16 = !{!"branch_weights", i32 1, i32 99999} !17 = !{!"section_prefix", !"hot"} +!18 = !{!"section_prefix", !"unlikely"} diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index bdb7c30..4ec54d8 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -2071,7 +2071,7 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) { ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,7,42,32] ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vphaddd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll index cc4bda8..650b562 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1 diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll index 7c1a1e2..874d885 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1 diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index 6174011..83a0ddb 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -5,9 +5,9 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,XOP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX512VLBW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512VL,VLVBMI @@ -598,6 +598,33 @@ define <8 x i16> @var_shuffle_zero_v8i16(<8 x i16> %v, <8 x i16> %indices) nounw ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; +; AVX512F-LABEL: var_shuffle_zero_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2 +; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [514,514,514,514,514,514,514,514] +; AVX512F-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: var_shuffle_zero_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpcmpnleuw %zmm2, %zmm1, %k1 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [514,514,514,514,514,514,514,514] +; AVX512BW-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; ; AVX512VL-LABEL: var_shuffle_zero_v8i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 @@ -921,6 +948,28 @@ define <16 x i8> @var_shuffle_zero_v16i8(<16 x i8> %v, <16 x i8> %indices) nounw ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; +; AVX512F-LABEL: var_shuffle_zero_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2 +; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: var_shuffle_zero_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; ; AVX512VL-LABEL: var_shuffle_zero_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 9b52857..d16b28a 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1872,7 +1872,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm1, %xmm2 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; SSE2-NEXT: por %xmm1, %xmm2 ; SSE2-NEXT: paddw %xmm0, %xmm0 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] @@ -1964,7 +1964,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 -; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; X86-SSE2-NEXT: por %xmm1, %xmm2 ; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll index 983ae59..3d85d55 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll @@ -851,7 +851,7 @@ define i16 @test_v4i16_v4i8(<4 x i16> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; SSE2-NEXT: paddw %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index d565ef0..1602cde 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1673,7 +1673,7 @@ define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,0,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,128,0,16,0,2,0,32,0,64,0,0,0,8,0,4] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [64,64,8,8,1,1,16,16,32,32,128,128,4,4,2,2] @@ -1750,7 +1750,7 @@ define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,0,65535,65535] ; X86-SSE-NEXT: pandn %xmm0, %xmm1 -; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,128,0,16,0,2,0,32,0,64,0,0,0,8,0,4] ; X86-SSE-NEXT: por %xmm1, %xmm0 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [64,64,8,8,1,1,16,16,32,32,128,128,4,4,2,2] diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 8cb2c7b..a847da6 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -1223,7 +1223,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -1275,7 +1275,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE-NEXT: pandn %xmm0, %xmm1 -; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; X86-SSE-NEXT: por %xmm1, %xmm0 ; X86-SSE-NEXT: retl %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll index 57874c4..eb39b6a 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll @@ -1480,7 +1480,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -1532,7 +1532,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE-NEXT: pandn %xmm0, %xmm1 -; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u] ; X86-SSE-NEXT: por %xmm1, %xmm0 ; X86-SSE-NEXT: retl %shift = lshr <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3> diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s index 73653d0..6345b2f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s s_mov_b64 s[2:3], 0x10abcdef12345678 // GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678 ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] @@ -62,10 +62,8 @@ s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 s_mov_b64 s[2:3], 0xffffffff01234567 // GFX1250: s_mov_b64 s[2:3], 0xffffffff01234567 ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff] -// TODO: disasm s_mov_b64 s[2:3], lit64(0x777) -// GFX1250-ASM: s_mov_b64 s[2:3], lit64(0x777) ; encoding: [0xfe,0x01,0x82,0xbe,0x77,0x07,0x00,0x00,0x00,0x00,0x00,0x00] -// GFX1250-DIS: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00] +// GFX1250: s_mov_b64 s[2:3], lit64(0x777) ; encoding: [0xfe,0x01,0x82,0xbe,0x77,0x07,0x00,0x00,0x00,0x00,0x00,0x00] s_mov_b64 s[2:3], 0x777 // GFX1250: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 0d61c1f..39de9a2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s v_mov_b64_e32 v[4:5], v[2:3] // GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e] @@ -26,8 +26,10 @@ v_mov_b64 v[4:5], -1 v_mov_b64 v[4:5], 0.5 // GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e] +// TODO: Encode as a 32-bit literal unless lit64() is specified. v_mov_b64 v[254:255], 0xaf123456 -// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] v_tanh_f32 v5, v1 // GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s index 02872b0..d9f6934 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s @@ -196,8 +196,9 @@ v_add_nc_u64 v[4:5], -4.0, v[4:5] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_nc_u64 v[4:5], 0xaf123456, v[4:5] -// GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] -// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250-ASM: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU v_add_nc_u64 v[4:5], 0x3f717273, v[4:5] // GFX1250: v_add_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x50,0x73,0x72,0x71,0x3f] @@ -316,8 +317,9 @@ v_sub_nc_u64 v[4:5], -4.0, v[4:5] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_nc_u64 v[4:5], 0xaf123456, v[4:5] -// GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] -// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250-ASM: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU v_sub_nc_u64 v[4:5], 0x3f717273, v[4:5] // GFX1250: v_sub_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x52,0x73,0x72,0x71,0x3f] @@ -436,8 +438,9 @@ v_mul_u64 v[4:5], -4.0, v[4:5] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_mul_u64 v[4:5], 0xaf123456, v[4:5] -// GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] -// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250-ASM: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU v_mul_u64 v[4:5], 0x3f717273, v[4:5] // GFX1250: v_mul_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x54,0x73,0x72,0x71,0x3f] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s index ad5771b..0548e9d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250,GFX1250-ASM %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s s_alloc_vgpr 0x1235 // GFX12: s_alloc_vgpr 0x1235 ; encoding: [0xff,0x53,0x80,0xbe,0x35,0x12,0x00,0x00] @@ -860,7 +860,8 @@ s_mov_b64 s[0:1], 0x3f717273 s_mov_b64 s[0:1], 0xaf123456 // GFX1200: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_mov_b64 s[0:1], null // GFX12: s_mov_b64 s[0:1], null ; encoding: [0x7c,0x01,0x80,0xbe] @@ -969,7 +970,8 @@ s_cmov_b64 s[0:1], 0x3f717273 s_cmov_b64 s[0:1], 0xaf123456 // GFX1200: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cmov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_not_b32 s0, s1 // GFX12: s_not_b32 s0, s1 ; encoding: [0x01,0x1e,0x80,0xbe] @@ -1072,7 +1074,8 @@ s_not_b64 s[0:1], 0x3f717273 s_not_b64 s[0:1], 0xaf123456 // GFX1200: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_not_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_wqm_b32 s0, s1 // GFX12: s_wqm_b32 s0, s1 ; encoding: [0x01,0x1c,0x80,0xbe] @@ -1175,7 +1178,8 @@ s_wqm_b64 s[0:1], 0x3f717273 s_wqm_b64 s[0:1], 0xaf123456 // GFX1200: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_wqm_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_brev_b32 s0, s1 // GFX12: s_brev_b32 s0, s1 ; encoding: [0x01,0x04,0x80,0xbe] @@ -1278,7 +1282,8 @@ s_brev_b64 s[0:1], 0x3f717273 s_brev_b64 s[0:1], 0xaf123456 // GFX1200: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_brev_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bcnt0_i32_b32 s0, s1 // GFX12: s_bcnt0_i32_b32 s0, s1 ; encoding: [0x01,0x16,0x80,0xbe] @@ -1390,7 +1395,8 @@ s_bcnt0_i32_b64 s0, 0x3f717273 s_bcnt0_i32_b64 s0, 0xaf123456 // GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_bcnt0_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bcnt1_i32_b32 s0, s1 // GFX12: s_bcnt1_i32_b32 s0, s1 ; encoding: [0x01,0x18,0x80,0xbe] @@ -1502,7 +1508,8 @@ s_bcnt1_i32_b64 s0, 0x3f717273 s_bcnt1_i32_b64 s0, 0xaf123456 // GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_bcnt1_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_ff1_i32_b32 s0, s1 // GFX12: s_ctz_i32_b32 s0, s1 ; encoding: [0x01,0x08,0x80,0xbe] @@ -1614,7 +1621,8 @@ s_ff1_i32_b64 s0, 0x3f717273 s_ff1_i32_b64 s0, 0xaf123456 // GFX1200: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_ctz_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_flbit_i32_b32 s0, s1 // GFX12: s_clz_i32_u32 s0, s1 ; encoding: [0x01,0x0a,0x80,0xbe] @@ -1726,7 +1734,8 @@ s_flbit_i32_b64 s0, 0x3f717273 s_flbit_i32_b64 s0, 0xaf123456 // GFX1200: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_clz_i32_u64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_flbit_i32 s0, s1 // GFX12: s_cls_i32 s0, s1 ; encoding: [0x01,0x0c,0x80,0xbe] @@ -1838,7 +1847,8 @@ s_flbit_i32_i64 s0, 0x3f717273 s_flbit_i32_i64 s0, 0xaf123456 // GFX1200: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cls_i32_i64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_sext_i32_i8 s0, s1 // GFX12: s_sext_i32_i8 s0, s1 ; encoding: [0x01,0x0e,0x80,0xbe] @@ -2284,7 +2294,8 @@ s_and_saveexec_b64 s[0:1], 0x3f717273 s_and_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_saveexec_b64 s[0:1], s[2:3] // GFX12: s_or_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x23,0x80,0xbe] @@ -2324,7 +2335,8 @@ s_or_saveexec_b64 s[0:1], 0x3f717273 s_or_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xor_saveexec_b64 s[0:1], s[2:3] // GFX12: s_xor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x25,0x80,0xbe] @@ -2364,7 +2376,8 @@ s_xor_saveexec_b64 s[0:1], 0x3f717273 s_xor_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_saveexec_b64 s[0:1], s[2:3] // GFX12: s_and_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x31,0x80,0xbe] @@ -2404,7 +2417,8 @@ s_andn2_saveexec_b64 s[0:1], 0x3f717273 s_andn2_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn2_saveexec_b64 s[0:1], s[2:3] // GFX12: s_or_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x33,0x80,0xbe] @@ -2444,7 +2458,8 @@ s_orn2_saveexec_b64 s[0:1], 0x3f717273 s_orn2_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nand_saveexec_b64 s[0:1], s[2:3] // GFX12: s_nand_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x27,0x80,0xbe] @@ -2484,7 +2499,8 @@ s_nand_saveexec_b64 s[0:1], 0x3f717273 s_nand_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nor_saveexec_b64 s[0:1], s[2:3] // GFX12: s_nor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x29,0x80,0xbe] @@ -2524,7 +2540,8 @@ s_nor_saveexec_b64 s[0:1], 0x3f717273 s_nor_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xnor_saveexec_b64 s[0:1], s[2:3] // GFX12: s_xnor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2b,0x80,0xbe] @@ -2564,7 +2581,8 @@ s_xnor_saveexec_b64 s[0:1], 0x3f717273 s_xnor_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_quadmask_b32 s0, s1 // GFX12: s_quadmask_b32 s0, s1 ; encoding: [0x01,0x1a,0x80,0xbe] @@ -2667,7 +2685,8 @@ s_quadmask_b64 s[0:1], 0x3f717273 s_quadmask_b64 s[0:1], 0xaf123456 // GFX1200: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_movrels_b32 s0, s1 // GFX12: s_movrels_b32 s0, s1 ; encoding: [0x01,0x40,0x80,0xbe] @@ -2812,7 +2831,8 @@ s_movreld_b64 s[0:1], 0x3f717273 s_movreld_b64 s[0:1], 0xaf123456 // GFX1200: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_abs_i32 s0, s1 // GFX12: s_abs_i32 s0, s1 ; encoding: [0x01,0x15,0x80,0xbe] @@ -2912,7 +2932,8 @@ s_andn1_saveexec_b64 s[0:1], 0x3f717273 s_andn1_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn1_saveexec_b64 s[0:1], s[2:3] // GFX12: s_or_not0_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2f,0x80,0xbe] @@ -2952,7 +2973,8 @@ s_orn1_saveexec_b64 s[0:1], 0x3f717273 s_orn1_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn1_wrexec_b64 s[0:1], s[2:3] // GFX12: s_and_not0_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x35,0x80,0xbe] @@ -2992,7 +3014,8 @@ s_andn1_wrexec_b64 s[0:1], 0x3f717273 s_andn1_wrexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_wrexec_b64 s[0:1], s[2:3] // GFX12: s_and_not1_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x37,0x80,0xbe] @@ -3032,7 +3055,8 @@ s_andn2_wrexec_b64 s[0:1], 0x3f717273 s_andn2_wrexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bitreplicate_b64_b32 s[0:1], s2 // GFX12: s_bitreplicate_b64_b32 s[0:1], s2 ; encoding: [0x02,0x14,0x80,0xbe] @@ -3831,7 +3855,8 @@ s_ctz_i32_b64 exec_hi, src_scc s_ctz_i32_b64 null, 0xaf123456 // GFX1200: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xff,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_ctz_i32_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not1_saveexec_b64 s[10:11], s[2:3] // GFX12: s_and_not1_saveexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x31,0x8a,0xbe] @@ -3859,7 +3884,8 @@ s_and_not1_saveexec_b64 ttmp[14:15], src_scc s_and_not1_saveexec_b64 null, 0xaf123456 // GFX1200: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not0_saveexec_b32 s5, s1 // GFX12: s_and_not0_saveexec_b32 s5, s1 ; encoding: [0x01,0x2c,0x85,0xbe] @@ -3920,7 +3946,8 @@ s_and_not0_saveexec_b64 ttmp[14:15], src_scc s_and_not0_saveexec_b64 null, 0xaf123456 // GFX1200: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not0_wrexec_b32 s5, s1 // GFX12: s_and_not0_wrexec_b32 s5, s1 ; encoding: [0x01,0x34,0x85,0xbe] @@ -3981,7 +4008,8 @@ s_and_not0_wrexec_b64 ttmp[14:15], src_scc s_and_not0_wrexec_b64 null, 0xaf123456 // GFX1200: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xff,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not0_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not1_saveexec_b32 s5, s1 // GFX12: s_and_not1_saveexec_b32 s5, s1 ; encoding: [0x01,0x30,0x85,0xbe] @@ -4075,7 +4103,8 @@ s_and_not1_wrexec_b64 ttmp[14:15], src_scc s_and_not1_wrexec_b64 null, 0xaf123456 // GFX1200: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xff,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_cls_i32 s5, s1 // GFX12: s_cls_i32 s5, s1 ; encoding: [0x01,0x0c,0x85,0xbe] @@ -4145,7 +4174,8 @@ s_cls_i32_i64 exec_hi, src_scc s_cls_i32_i64 null, 0xaf123456 // GFX1200: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xff,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cls_i32_i64 null, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_clz_i32_u32 s5, s1 // GFX12: s_clz_i32_u32 s5, s1 ; encoding: [0x01,0x0a,0x85,0xbe] @@ -4215,7 +4245,8 @@ s_clz_i32_u64 exec_hi, src_scc s_clz_i32_u64 null, 0xaf123456 // GFX1200: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xff,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_clz_i32_u64 null, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not0_saveexec_b32 s5, s1 // GFX12: s_or_not0_saveexec_b32 s5, s1 ; encoding: [0x01,0x2e,0x85,0xbe] @@ -4276,7 +4307,8 @@ s_or_not0_saveexec_b64 ttmp[14:15], src_scc s_or_not0_saveexec_b64 null, 0xaf123456 // GFX1200: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not1_saveexec_b32 s5, s1 // GFX12: s_or_not1_saveexec_b32 s5, s1 ; encoding: [0x01,0x32,0x85,0xbe] @@ -4337,4 +4369,5 @@ s_or_not1_saveexec_b64 ttmp[14:15], src_scc s_or_not1_saveexec_b64 null, 0xaf123456 // GFX1200: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s index 9c83879..3a24442 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250-ASM %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX12,GFX1250-DIS %s s_add_nc_u64 s[0:1], s[2:3], s[4:5] // GFX12: s_add_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xa9] @@ -56,7 +56,8 @@ s_add_nc_u64 s[0:1], 0x3f717273, s[2:3] s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] // GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf] -// GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_add_nc_u64 s[0:1], s[2:3], exec // GFX12: s_add_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xa9] @@ -81,7 +82,8 @@ s_add_nc_u64 s[0:1], s[2:3], 0x3f717273 s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf] -// GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_sub_nc_u64 s[0:1], s[2:3], s[4:5] // GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x00,0xaa] @@ -136,7 +138,8 @@ s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3] s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] // GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_sub_nc_u64 s[0:1], s[2:3], exec // GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x00,0xaa] @@ -161,7 +164,8 @@ s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273 s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_mul_u64 s[0:1], s[2:3], s[4:5] // GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xaa] @@ -216,7 +220,8 @@ s_mul_u64 s[0:1], 0x3f717273, s[2:3] s_mul_u64 s[0:1], 0xaf123456, s[2:3] // GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_mul_u64 s[0:1], s[2:3], exec // GFX12: s_mul_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xaa] @@ -241,7 +246,8 @@ s_mul_u64 s[0:1], s[2:3], 0x3f717273 s_mul_u64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_add_f32 s5, s1, s2 // GFX12: s_add_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa0] @@ -2359,7 +2365,8 @@ s_cselect_b64 s[0:1], 0x3f717273, s[4:5] s_cselect_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf] -// GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_cselect_b64 s[0:1], s[2:3], exec // GFX12: s_cselect_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x98] @@ -2384,7 +2391,8 @@ s_cselect_b64 s[0:1], s[2:3], 0x3f717273 s_cselect_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf] -// GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_b32 s0, s1, s2 // GFX12: s_and_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8b] @@ -2553,7 +2561,8 @@ s_and_b64 s[0:1], 0x3f717273, s[4:5] s_and_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_b64 s[0:1], s[2:3], exec // GFX12: s_and_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8b] @@ -2578,7 +2587,8 @@ s_and_b64 s[0:1], s[2:3], 0x3f717273 s_and_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_b32 s0, s1, s2 // GFX12: s_or_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8c] @@ -2738,7 +2748,8 @@ s_or_b64 s[0:1], 0x3f717273, s[4:5] s_or_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_b64 s[0:1], s[2:3], exec // GFX12: s_or_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8c] @@ -2763,7 +2774,8 @@ s_or_b64 s[0:1], s[2:3], 0x3f717273 s_or_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xor_b32 s0, s1, s2 // GFX12: s_xor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8d] @@ -2923,7 +2935,8 @@ s_xor_b64 s[0:1], 0x3f717273, s[4:5] s_xor_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf] -// GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xor_b64 s[0:1], s[2:3], exec // GFX12: s_xor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8d] @@ -2948,7 +2961,8 @@ s_xor_b64 s[0:1], s[2:3], 0x3f717273 s_xor_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf] -// GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_b32 s0, s1, s2 // GFX12: s_and_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x91] @@ -3108,7 +3122,8 @@ s_andn2_b64 s[0:1], 0x3f717273, s[4:5] s_andn2_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_b64 s[0:1], s[2:3], exec // GFX12: s_and_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x91] @@ -3133,7 +3148,8 @@ s_andn2_b64 s[0:1], s[2:3], 0x3f717273 s_andn2_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn2_b32 s0, s1, s2 // GFX12: s_or_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x92] @@ -3293,7 +3309,8 @@ s_orn2_b64 s[0:1], 0x3f717273, s[4:5] s_orn2_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn2_b64 s[0:1], s[2:3], exec // GFX12: s_or_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x92] @@ -3318,7 +3335,8 @@ s_orn2_b64 s[0:1], s[2:3], 0x3f717273 s_orn2_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nand_b32 s0, s1, s2 // GFX12: s_nand_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8e] @@ -3478,7 +3496,8 @@ s_nand_b64 s[0:1], 0x3f717273, s[4:5] s_nand_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf] -// GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nand_b64 s[0:1], s[2:3], exec // GFX12: s_nand_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8e] @@ -3503,7 +3522,8 @@ s_nand_b64 s[0:1], s[2:3], 0x3f717273 s_nand_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf] -// GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nor_b32 s0, s1, s2 // GFX12: s_nor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8f] @@ -3663,7 +3683,8 @@ s_nor_b64 s[0:1], 0x3f717273, s[4:5] s_nor_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf] -// GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nor_b64 s[0:1], s[2:3], exec // GFX12: s_nor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8f] @@ -3688,7 +3709,8 @@ s_nor_b64 s[0:1], s[2:3], 0x3f717273 s_nor_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf] -// GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xnor_b32 s0, s1, s2 // GFX12: s_xnor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x90] @@ -3848,7 +3870,8 @@ s_xnor_b64 s[0:1], 0x3f717273, s[4:5] s_xnor_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf] -// GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xnor_b64 s[0:1], s[2:3], exec // GFX12: s_xnor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x90] @@ -3873,7 +3896,8 @@ s_xnor_b64 s[0:1], s[2:3], 0x3f717273 s_xnor_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf] -// GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_lshl_b32 s0, s1, s2 // GFX12: s_lshl_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x84] @@ -4033,7 +4057,8 @@ s_lshl_b64 s[0:1], 0x3f717273, s4 s_lshl_b64 s[0:1], 0xaf123456, s4 // GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf] -// GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_lshl_b64 s[0:1], s[2:3], exec_lo // GFX12: s_lshl_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x84] @@ -4217,7 +4242,8 @@ s_lshr_b64 s[0:1], 0x3f717273, s4 s_lshr_b64 s[0:1], 0xaf123456, s4 // GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf] -// GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_lshr_b64 s[0:1], s[2:3], exec_lo // GFX12: s_lshr_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x85] @@ -4401,7 +4427,8 @@ s_ashr_i64 s[0:1], 0x3f717273, s4 s_ashr_i64 s[0:1], 0xaf123456, s4 // GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf] -// GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_ashr_i64 s[0:1], s[2:3], exec_lo // GFX12: s_ashr_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x86] @@ -4996,7 +5023,8 @@ s_bfe_u64 s[0:1], 0x3f717273, s4 s_bfe_u64 s[0:1], 0xaf123456, s4 // GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf] -// GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bfe_u64 s[0:1], s[2:3], exec_lo // GFX12: s_bfe_u64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x00,0x94] @@ -5075,7 +5103,8 @@ s_bfe_i64 s[0:1], 0x3f717273, s4 s_bfe_i64 s[0:1], 0xaf123456, s4 // GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf] -// GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bfe_i64 s[0:1], s[2:3], exec_lo // GFX12: s_bfe_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x94] @@ -6279,7 +6308,8 @@ s_and_not1_b64 s[10:11], vcc, ttmp[14:15] s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 // GFX1200: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not1_b64 s[10:11], exec, src_scc // GFX12: s_and_not1_b64 s[10:11], exec, src_scc ; encoding: [0x7e,0xfd,0x8a,0x91] @@ -6298,7 +6328,8 @@ s_and_not1_b64 exec, src_scc, exec s_and_not1_b64 null, 0xaf123456, vcc // GFX1200: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xff,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_and_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not1_b64 s[10:11], s[2:3], s[4:5] // GFX12: s_or_not1_b64 s[10:11], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x8a,0x92] @@ -6311,7 +6342,8 @@ s_or_not1_b64 s[10:11], vcc, ttmp[14:15] s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 // GFX1200: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not1_b64 s[10:11], exec, src_scc // GFX12: s_or_not1_b64 s[10:11], exec, src_scc ; encoding: [0x7e,0xfd,0x8a,0x92] @@ -6330,4 +6362,5 @@ s_or_not1_b64 exec, src_scc, exec s_or_not1_b64 null, 0xaf123456, vcc // GFX1200: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xff,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_or_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s index 98bb3c3..8056cef 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1200 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1250 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1250-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX12,GFX1250-DIS %s s_cmp_lt_f32 s1, s2 // GFX12: s_cmp_lt_f32 s1, s2 ; encoding: [0x01,0x02,0x41,0xbf] @@ -2120,7 +2120,8 @@ s_cmp_eq_u64 s[0:1], 0x3f717273 s_cmp_eq_u64 s[0:1], 0xaf123456 // GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf] -// GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cmp_eq_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_cmp_lg_u64 s[0:1], s[2:3] // GFX12: s_cmp_lg_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x11,0xbf] @@ -2163,4 +2164,5 @@ s_cmp_lg_u64 s[0:1], 0x3f717273 s_cmp_lg_u64 s[0:1], 0xaf123456 // GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf] -// GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-ASM: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_cmp_lg_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/lit.local.cfg b/llvm/test/MC/AMDGPU/lit.local.cfg index c5853ad..12a5c8a 100644 --- a/llvm/test/MC/AMDGPU/lit.local.cfg +++ b/llvm/test/MC/AMDGPU/lit.local.cfg @@ -1,4 +1,4 @@ -config.substitutions.append(("%extract-encodings", "sed 's/.*encoding://p'")) +config.substitutions.append(("%extract-encodings", "sed -n 's/.*encoding://p'")) if not "AMDGPU" in config.root.targets: config.unsupported = True diff --git a/llvm/test/MC/AMDGPU/offset-expr.s b/llvm/test/MC/AMDGPU/offset-expr.s index 92a9bf1b..7c3c71c 100644 --- a/llvm/test/MC/AMDGPU/offset-expr.s +++ b/llvm/test/MC/AMDGPU/offset-expr.s @@ -9,10 +9,10 @@ BB1: v_nop_e64 BB2: s_add_u32 vcc_lo, vcc_lo, (BB2-BB1)&4294967295 -// CHECK: s_add_u32 vcc_lo, vcc_lo, 8 // 000000000018: 806AFF6A 00000008 +// CHECK: s_add_u32 vcc_lo, vcc_lo, lit(0x8) // 000000000018: 806AFF6A 00000008 s_addc_u32 vcc_hi, vcc_hi, (BB2-BB1)>>32 -// CHECK: s_addc_u32 vcc_hi, vcc_hi, 0 // 000000000020: 826BFF6B 00000000 +// CHECK: s_addc_u32 vcc_hi, vcc_hi, lit(0x0) // 000000000020: 826BFF6B 00000000 s_add_u32 vcc_lo, vcc_lo, (BB0-BB1)&4294967295 -// CHECK: s_add_u32 vcc_lo, vcc_lo, -16 // 000000000028: 806AFF6A FFFFFFF0 +// CHECK: s_add_u32 vcc_lo, vcc_lo, lit(0xfffffff0) // 000000000028: 806AFF6A FFFFFFF0 s_addc_u32 vcc_hi, vcc_hi, (BB0-BB1)>>32 -// CHECK: s_addc_u32 vcc_hi, vcc_hi, -1 // 000000000030: 826BFF6B FFFFFFFF +// CHECK: s_addc_u32 vcc_hi, vcc_hi, lit(0xffffffff) // 000000000030: 826BFF6B FFFFFFFF diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt index d2da087..856d7c2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt @@ -40,8 +40,7 @@ # VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x00] 0xff 0x06 0x02 0x3e 0x41 0x00 0x00 0x01 -# FIXME: This should be able to round trip with literal after instruction -# VI: v_add_f16_e32 v1, 0, v3 ; encoding: [0x80,0x06,0x02,0x3e] +# VI: v_add_f16_e32 v1, lit(0x0), v3 ; encoding: [0xff,0x06,0x02,0x3e,0x00,0x00,0x00,0x00] 0xff 0x06 0x02 0x3e 0x00 0x00 0x00 0x00 # VI: v_add_f16_e32 v1, 0xffcd, v3 ; encoding: [0xff,0x06,0x02,0x3e,0xcd,0xff,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/literals.txt b/llvm/test/MC/Disassembler/AMDGPU/literals.txt new file mode 100644 index 0000000..bd013a1 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/literals.txt @@ -0,0 +1,30 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xff,0x94,0xfe,0x7e,0x01,0x00,0x00,0x00 +# GFX1250: v_tanh_bf16_e32 v127.l, lit(0x1) ; encoding: [0xff,0x94,0xfe,0x7e,0x01,0x00,0x00,0x00] + +0xff,0xd3,0x23,0xcc,0xff,0xd6,0x00,0x68,0x01,0x00,0x00,0x00 +# GFX1250: v_pk_add_bf16 v255, lit(0x1), vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x23,0xcc,0xff,0xd6,0x00,0x68,0x01,0x00,0x00,0x00] + +0xff,0x3e,0xfe,0x7e,0x01,0x00,0x00,0x00 +# GFX1250: v_tanh_f16_e32 v127.l, lit(0x1) ; encoding: [0xff,0x3e,0xfe,0x7e,0x01,0x00,0x00,0x00] + +0xff,0xfe,0xff,0x79,0x01,0x00,0x00,0x00 +# GFX1250: v_pk_fmac_f16 v255, lit(0x1), v255 ; encoding: [0xff,0xfe,0xff,0x79,0x01,0x00,0x00,0x00] + +# The immediate is always literal in this instruction. +0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +0xff,0xec,0x02,0x7e,0x01,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_bf8 v1, lit(0x1) ; encoding: [0xff,0xec,0x02,0x7e,0x01,0x00,0x00,0x00] + +0x0a,0x40,0x2d,0xcc,0xff,0x04,0x0e,0x1c,0x01,0x00,0x00,0x00 +# GFX1250: v_pk_add_min_i16 v10, lit(0x1), v2, v3 ; encoding: [0x0a,0x40,0x2d,0xcc,0xff,0x04,0x0e,0x1c,0x01,0x00,0x00,0x00] + +0xff,0x3c,0xfe,0x7f,0x01,0x00,0x00,0x00 +# GFX1250: v_tanh_f32_e32 v255, lit(0x1) ; encoding: [0xff,0x3c,0xfe,0x7f,0x01,0x00,0x00,0x00] + +0xff,0x3a,0xfc,0x7f,0x01,0x00,0x00,0x00 +# GFX1250: v_mov_b64_e32 v[254:255], lit(0x1) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/X86/verify-callgraph-section.s b/llvm/test/MC/X86/verify-callgraph-section.s index ce07228..9be5a68 100644 --- a/llvm/test/MC/X86/verify-callgraph-section.s +++ b/llvm/test/MC/X86/verify-callgraph-section.s @@ -2,7 +2,7 @@ /// (annotated by generated temporary labels .Ltmp*) are associated /// with the corresponding callee type identifiers. -// RUN: llvm-mc -triple=x86_64 -filetype=obj -o - < %s | llvm-readelf -x .callgraph - | FileCheck %s +// RUN: llvm-mc -triple=x86_64 -filetype=obj -o - < %s | llvm-readelf -x .llvm.callgraph - | FileCheck %s .text .globl ball # -- Begin function ball @@ -38,7 +38,7 @@ ball: # @ball addq $32, %rsp popq %rbx retq - .section .callgraph,"o",@progbits,.text + .section .llvm.callgraph,"o",@progbits,.text .quad 0 .quad .Lfunc_begin0 .quad 1 diff --git a/llvm/test/Transforms/Inline/ML/state-accounting-skip-non-cold.ll b/llvm/test/Transforms/Inline/ML/state-accounting-skip-non-cold.ll new file mode 100644 index 0000000..0887f5e --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/state-accounting-skip-non-cold.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; REQUIRES: llvm_inliner_model_autogenerated && asserts +; RUN: opt -passes='default<O3>' -enable-ml-inliner=release -ml-inliner-skip-policy=if-caller-not-cold -S %s -o - | FileCheck %s +; RUN: opt -passes='default<O3>' -ml-inliner-stop-immediately -enable-ml-inliner=release -ml-inliner-skip-policy=if-caller-not-cold -S %s -o - | FileCheck %s + +declare ptr @f() + +define void @e() #0 { +; CHECK-LABEL: define void @e( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: tail call void @d() +; CHECK-NEXT: tail call void @g() +; CHECK-NEXT: tail call void @d() +; CHECK-NEXT: tail call void @g() +; CHECK-NEXT: tail call void @d() +; CHECK-NEXT: tail call void @g() +; CHECK-NEXT: ret void +; + call void @h() + call void @h() + call void @h() + ret void +} + +define void @d() { +; CHECK-LABEL: define void @d() local_unnamed_addr { +; CHECK-NEXT: tail call void @f() +; CHECK-NEXT: ret void +; + call void @f() + ret void +} + +define void @g() { +; CHECK-LABEL: define void @g() local_unnamed_addr { +; CHECK-NEXT: tail call void @f() +; CHECK-NEXT: ret void +; + call void @f() + ret void +} + +define void @h() #0 { +; CHECK-LABEL: define void @h( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: tail call void @d() +; CHECK-NEXT: tail call void @g() +; CHECK-NEXT: ret void +; + call void @d() + call void @g() + ret void +} + +attributes #0 = { "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" } diff --git a/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll b/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll index 7cc4446..ad45d1e 100644 --- a/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll +++ b/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll @@ -11,10 +11,8 @@ define i16 @test5(i16 %A) !dbg !34 { call void @llvm.dbg.value(metadata i32 %C, metadata !37, metadata !DIExpression()), !dbg !41 ; Preserve the dbg.value for the DCE'd 32-bit 'and'. - ; - ; The high 16 bits of the original 'and' require sign-extending the new 16-bit and: ; CHECK-NEXT: #dbg_value(i16 [[and]], [[C:![0-9]+]], - ; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value) + ; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value) %D = trunc i32 %C to i16, !dbg !42 call void @llvm.dbg.value(metadata i16 %D, metadata !38, metadata !DIExpression()), !dbg !42 diff --git a/llvm/test/Transforms/InstCombine/ptr-int-cast.ll b/llvm/test/Transforms/InstCombine/ptr-int-cast.ll index 69b8f69..82ecbd4 100644 --- a/llvm/test/Transforms/InstCombine/ptr-int-cast.ll +++ b/llvm/test/Transforms/InstCombine/ptr-int-cast.ll @@ -86,3 +86,14 @@ define <4 x ptr> @test7(<4 x i128> %arg) nounwind { %p1 = inttoptr <4 x i128> %arg to <4 x ptr> ret <4 x ptr> %p1 } + +define i64 @ptrtoint_gep_sub(ptr %ptr, i64 %end.addr) { +; CHECK-LABEL: @ptrtoint_gep_sub( +; CHECK-NEXT: ret i64 [[END_ADDR:%.*]] +; + %ptr.addr = ptrtoint ptr %ptr to i64 + %size = sub i64 %end.addr, %ptr.addr + %end = getelementptr i8, ptr %ptr, i64 %size + %end.addr2 = ptrtoint ptr %end to i64 + ret i64 %end.addr2 +} diff --git a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index 279d4e8..83623fd 100644 --- a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;CHECK-LABEL: @foo( ;CHECK: icmp eq <4 x i32> ;CHECK: select <4 x i1> -;CHECK: ret i32 -define i32 @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp { +;CHECK: ret void +define void @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp { entry: %cmp10 = icmp sgt i32 %x, 0 br i1 %cmp10, label %for.body, label %for.end @@ -35,5 +35,5 @@ if.end: ; preds = %for.body, %if.then br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %if.end, %entry - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index ab9b48f..aff2c4c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -153,17 +153,20 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = lshr i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i8> ; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i8> [[TMP6]], i32 1 ; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP18]], align 1 -; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP19]], align 1 +; CHECK-NEXT: store i8 [[TMP12]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll index 596e42e..d0c1194 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll @@ -36,7 +36,7 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond.not, label %exit, label %for.body } -define i32 @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 { +define void @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 { ; CHECK-COST-2: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ] ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032 @@ -70,7 +70,7 @@ for.cond.cleanup.loopexit: ; preds = %if.end br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 undef + ret void for.body: ; preds = %for.body.preheader, %if.end %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll index 9e20586..44fb8cb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll @@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK-LABEL: @read_mod_write_single_ptr( ; CHECK: load <8 x float> -; CHECK: ret i32 -define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +; CHECK: ret void +define void @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -23,15 +23,15 @@ define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } ; CHECK-LABEL: @read_mod_i64( ; SLOWMEM32: load <2 x i64> ; FASTMEM32: load <4 x i64> -; CHECK: ret i32 -define i32 @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +; CHECK: ret void +define void @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -47,6 +47,6 @@ define i32 @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp { br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 6d2cda4..0287645 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { +define void @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { ; CHECK-LABEL: @conversion_cost1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[ITER_CHECK:%.*]], label [[DOT_CRIT_EDGE:%.*]] @@ -37,7 +37,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -58,7 +58,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i8> [[VEC_IND8]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -73,11 +73,11 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; %1 = icmp sgt i32 %n, 3 br i1 %1, label %.lr.ph, label %._crit_edge @@ -93,10 +93,10 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } -define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { +define void @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { ; CHECK-LABEL: @conversion_cost2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 9 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] @@ -136,7 +136,7 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -152,11 +152,11 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; %1 = icmp sgt i32 %n, 9 br i1 %1, label %.lr.ph, label %._crit_edge @@ -173,5 +173,5 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll index af5c921..fa3b4a66 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll @@ -13,7 +13,7 @@ target triple = "x86_64-unknown-linux" ;CHECK-LABEL: func1x6( ;CHECK: <4 x i32> ;CHECK: ret -define i32 @func1x6(ptr nocapture %out, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { +define void @func1x6(ptr nocapture %out, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { entry: br label %for.body @@ -40,14 +40,14 @@ for.body: ; preds = %for.body, %entry br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body - ret i32 undef + ret void } ; We are vectorizing with 12 runtime checks. ;CHECK-LABEL: func2x6( ;CHECK: <4 x i32> ;CHECK: ret -define i32 @func2x6(ptr nocapture %out, ptr nocapture %out2, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { +define void @func2x6(ptr nocapture %out, ptr nocapture %out2, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { entry: br label %for.body @@ -85,5 +85,5 @@ for.body: ; preds = %for.body, %entry br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll index 8971dfe..47355e7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK-NOUNRL: store <4 x i32> ;CHECK-NOUNRL-NOT: store <4 x i32> ;CHECK-NOUNRL: ret -define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp { +define void @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -27,5 +27,5 @@ define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp { br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll index f64255f..b7aa958 100644 --- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; When scalarizing stores we need to preserve the original order. ; Make sure that we are extracting in the correct order (0101, and not 0011). -define i32 @foo(ptr nocapture %A) { +define void @foo(ptr nocapture %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] @@ -39,7 +39,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: br label %for.body @@ -55,7 +55,7 @@ for.body: br i1 %exitcond, label %for.end, label %for.body for.end: - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index 1588d02..51255b2 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { +define void @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -73,7 +73,7 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp26 = icmp sgt i32 %n, 0 @@ -106,11 +106,11 @@ if.end14: br i1 %exitcond, label %for.end, label %for.body for.end: - ret i32 undef + ret void } ; As above but with multiple variables set per block. -define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { +define void @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-LABEL: @multi_variable_if_nest( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -188,7 +188,7 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp26 = icmp sgt i32 %n, 0 @@ -224,5 +224,5 @@ if.end14: br i1 %exitcond, label %for.end, label %for.body for.end: - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll index 8a7f4a3..a88a9b14 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll @@ -17,8 +17,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; } ;} -define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { -; CHECK-LABEL: define i32 @function0( +define void @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { +; CHECK-LABEL: define void @function0( ; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]], i32 [[START:%.*]], i32 [[END:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP16:%.*]] = icmp slt i32 [[START]], [[END]] @@ -94,7 +94,7 @@ define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp16 = icmp slt i32 %start, %end @@ -127,7 +127,7 @@ if.end: br i1 %cmp, label %for.body, label %for.end for.end: - ret i32 undef + ret void } @@ -237,6 +237,8 @@ for.end: ; preds = %for.inc, %entry ; Handle PHI with single incoming value having a full mask. ; PR34523 +; NOTE: Changing PHI inputs from undef to poison leads to change in +; behaviour of the test. Left as undef for now. define void @PR34523() { ; CHECK-LABEL: define void @PR34523() { ; CHECK-NEXT: [[BB1:.*:]] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 742ee64..eea2237 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -337,7 +337,7 @@ for.end: ; preds = %for.body ; } ; } -define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { +define void @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { ; CHECK-LABEL: @multiple_uniform_stores( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 @@ -429,7 +429,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK: for.end10.loopexit: ; CHECK-NEXT: br label [[FOR_END10]] ; CHECK: for.end10: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp20 = icmp eq i32 %itr, 0 @@ -469,12 +469,12 @@ for.inc8: ; preds = %for.body3, %for.con br i1 %exitcond26, label %for.end10, label %for.cond1.preheader for.end10: ; preds = %for.inc8, %entry - ret i32 undef + ret void } ; second uniform store to the same address is conditional. ; we do not vectorize this. -define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { +define void @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { ; CHECK-LABEL: @multiple_uniform_stores_conditional( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 @@ -520,7 +520,7 @@ define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocaptu ; CHECK: for.end10.loopexit: ; CHECK-NEXT: br label [[FOR_END10]] ; CHECK: for.end10: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp20 = icmp eq i32 %itr, 0 @@ -567,7 +567,7 @@ for.inc8: ; preds = %for.body3, %for.con br i1 %exitcond26, label %for.end10, label %for.cond1.preheader for.end10: ; preds = %for.inc8, %entry - ret i32 undef + ret void } ; cannot vectorize loop with unsafe dependency between uniform load (%i10) and store diff --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll index b891b43..d9d9eec 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep.ll @@ -132,7 +132,7 @@ for.end: ; CHECK-LABEL: @f6 ; CHECK-NOT: <2 x i32> -define i32 @f6(ptr %a, i32 %tmp) { +define void @f6(ptr %a, i32 %tmp) { entry: br label %for.body @@ -149,7 +149,7 @@ for.body: br i1 %exitcond, label %for.body, label %for.end for.end: - ret i32 undef + ret void } ; Don't vectorize true loop carried dependencies that are not a multiple of the diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 1533906..53dad3a 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -74,8 +74,7 @@ exit: ret void } -; FIXME: Currently this mis-compiled when interleaving; all stores store the -; last lane of the last part, instead of the last lane per part. +; Check each unrolled store stores the last lane of the corresponding part. ; Test case for https://github.com/llvm/llvm-project/issues/162498. define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(ptr %dst) { ; VF4IC1-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( @@ -121,13 +120,15 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3 -; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP7]], 1 ; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]] ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]] -; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 +; VF2IC2-NEXT: store i32 [[TMP8]], ptr [[TMP4]], align 4 ; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll new file mode 100644 index 0000000..131e41a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll @@ -0,0 +1,227 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s + +define void @call_loop_invariant_operand_bundle(ptr %dst, {float, float} %sv) { +; CHECK-LABEL: define void @call_loop_invariant_operand_bundle( +; CHECK-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[SV]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.pow.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[BROADCAST_SPLAT2]]) [ "deopt"(float 1.000000e+01) ] +; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %a = extractvalue { float, float } %sv, 0 + %b = extractvalue { float, float } %sv, 1 + %addr = getelementptr float, ptr %dst, i32 %iv + %p = call float @llvm.pow.f32(float %a, float %b) [ "deopt"(float 10.0) ] + store float %p, ptr %addr + %iv.next = add nsw i32 %iv, 1 + %cond = icmp ne i32 %iv.next, 1000 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +define void @call_unknown_operand_bundle(ptr %dst, {float, float} %sv) { +; CHECK-LABEL: define void @call_unknown_operand_bundle( +; CHECK-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[SV]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.pow.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[BROADCAST_SPLAT2]]) [ "unknown"(ptr null) ] +; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %a = extractvalue { float, float } %sv, 0 + %b = extractvalue { float, float } %sv, 1 + %addr = getelementptr float, ptr %dst, i32 %iv + %p = call float @llvm.pow.f32(float %a, float %b) [ "unknown"(ptr null) ] + store float %p, ptr %addr + %iv.next = add nsw i32 %iv, 1 + %cond = icmp ne i32 %iv.next, 1000 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +define void @call_cold_operand_bundle(ptr %dst, {float, float} %sv) { +; CHECK-LABEL: define void @call_cold_operand_bundle( +; CHECK-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { float, float } [[SV]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.pow.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[BROADCAST_SPLAT2]]) [ "cold"() ] +; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %a = extractvalue { float, float } %sv, 0 + %b = extractvalue { float, float } %sv, 1 + %addr = getelementptr float, ptr %dst, i32 %iv + %p = call float @llvm.pow.f32(float %a, float %b) [ "cold"() ] + store float %p, ptr %addr + %iv.next = add nsw i32 %iv, 1 + %cond = icmp ne i32 %iv.next, 1000 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +define void @assume_loop_variant_operand_bundle(ptr noalias %a, ptr noalias %b) { +; CHECK-LABEL: define void @assume_loop_variant_operand_bundle( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP0]]) ] +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP1]]) ] +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP2]]) ] +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP3]]) ] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600 +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 1.000000e+02 + tail call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 %iv) ] + %add = fadd float %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %iv + store float %add, ptr %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, 1599 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @assume_cold_operand_bundle(ptr noalias %a, ptr noalias %b) { +; CHECK-LABEL: define void @assume_cold_operand_bundle( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "cold"() ] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 1.000000e+02 + tail call void @llvm.assume(i1 true) [ "cold"() ] + %add = fadd float %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %iv + store float %add, ptr %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, 1599 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll index d700d48..f5e480c 100644 --- a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll @@ -10,7 +10,7 @@ ; CHECK: store i64 %indvars.outer, ptr %O2, align 4 -define i64 @foo(ptr nocapture %A, ptr nocapture %B, i64 %n, i64 %m, ptr %O1, ptr %O2) { +define void @foo(ptr nocapture %A, ptr nocapture %B, i64 %n, i64 %m, ptr %O1, ptr %O2) { entry: %cmp = icmp sgt i64 %n, 0 br i1 %cmp, label %for.body.outer.preheader, label %for.end.outer @@ -50,5 +50,5 @@ for.end.outer.loopexit: ; preds = %for.end.inner br label %for.end.outer for.end.outer: ; preds = %for.end.outer.loopexit, %entry - ret i64 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/pr28541.ll b/llvm/test/Transforms/LoopVectorize/pr28541.ll index ad7f6e7..0a9c8c1 100644 --- a/llvm/test/Transforms/LoopVectorize/pr28541.ll +++ b/llvm/test/Transforms/LoopVectorize/pr28541.ll @@ -28,7 +28,7 @@ ; CHECK-NOT: vectorized loop ; CHECK-LABEL: fn1 -define i32 @fn1() { +define void @fn1() { entry: %tmp2 = load i32, ptr @b, align 4 %dec3 = add nsw i32 %tmp2, -1 @@ -67,5 +67,5 @@ while.cond.while.end_crit_edge: ; preds = %while.cond br label %while.end while.end: ; preds = %while.cond.while.end_crit_edge, %entry - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index f87be5a..6ea227f 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; a[i] = b[i] * 3; ; } -define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp { +define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B:%.*]] to i64, !dbg [[DBG4:![0-9]+]] @@ -58,7 +58,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]], !dbg [[DBG14:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef, !dbg [[DBG14]] +; CHECK-NEXT: ret void, !dbg [[DBG14]] ; ; FORCED_OPTSIZE-LABEL: @foo( ; FORCED_OPTSIZE-NEXT: entry: @@ -80,7 +80,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; FORCED_OPTSIZE: for.end.loopexit: ; FORCED_OPTSIZE-NEXT: br label [[FOR_END]], !dbg [[DBG10:![0-9]+]] ; FORCED_OPTSIZE: for.end: -; FORCED_OPTSIZE-NEXT: ret i32 undef, !dbg [[DBG10]] +; FORCED_OPTSIZE-NEXT: ret void, !dbg [[DBG10]] ; entry: %cmp6 = icmp sgt i32 %n, 0, !dbg !6 @@ -99,7 +99,7 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond, label %for.end, label %for.body, !dbg !7 for.end: ; preds = %for.body, %entry - ret i32 undef, !dbg !8 + ret void, !dbg !8 } ; Make sure that we try to vectorize loops with a runtime check if the @@ -505,11 +505,11 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META37:![0-9]+]], !noalias [[META40:![0-9]+]] -; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META40]] +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META36:![0-9]+]], !noalias [[META39:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META39]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -524,7 +524,7 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: store i32 0, ptr [[IN]], align 4 ; CHECK-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]] -; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/write-only.ll b/llvm/test/Transforms/LoopVectorize/write-only.ll index cc21b94..8df71e83 100644 --- a/llvm/test/Transforms/LoopVectorize/write-only.ll +++ b/llvm/test/Transforms/LoopVectorize/write-only.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;CHECK-LABEL: @read_mod_write_single_ptr( ;CHECK: load <4 x float> -;CHECK: ret i32 -define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +;CHECK: ret void +define void @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -21,14 +21,14 @@ define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } ; Ensure that volatile stores are not vectorized. ; CHECK-LABEL: @read_mod_write_single_ptr_volatile_store( ; CHECK-NOT: store <4 x float> -; CHECK: ret i32 -define i32 @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +; CHECK: ret void +define void @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -44,5 +44,5 @@ define i32 @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) n br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll index f1ffcc7..239397b 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll @@ -17,7 +17,7 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { ; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ -; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF16:![0-9]+]] { +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {{.*}}{ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 ; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF17:![0-9]+]] diff --git a/llvm/test/tools/llvm-objdump/ELF/Hexagon/packet-reset-on-label.s b/llvm/test/tools/llvm-objdump/ELF/Hexagon/packet-reset-on-label.s new file mode 100644 index 0000000..02a52bb --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/Hexagon/packet-reset-on-label.s @@ -0,0 +1,23 @@ +// RUN: llvm-mc -triple=hexagon -mcpu=hexagonv75 -filetype=obj %s \ +// RUN: | llvm-objdump -d - \ +// RUN: | FileCheck %s + +foo: + { nop } + /// a nop without end-of-packet bits set to simulate data that is + /// not a proper packet end. + .long 0x7f004000 +bar: + { nop + nop + } + +// CHECK-LABEL: <foo>: +// CHECK: { nop } +// CHECK-NEXT: { nop + +/// The instruction starting after <bar> should start in a new packet. +// CHECK-LABEL: <bar>: +// CHECK: { nop +// CHECK-NEXT: nop } + |