; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=gvn -S < %s | FileCheck %s ; Check that in both cases the second load is recognized as redundant ; and is removed. define <128 x i8> @f0(ptr %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-LABEL: @f0( ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] ; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr align 4 [[A0:%.*]], <128 x i1> [[V0]], <128 x i8> undef) ; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] ; CHECK-NEXT: ret <128 x i8> [[V3]] ; %v0 = icmp eq <128 x i8> %a1, %a2 %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) %v3 = add <128 x i8> %v1, %v2 ret <128 x i8> %v3 } define <128 x i8> @f1(ptr %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] ; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, ptr [[A0:%.*]], i32 1 ; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr align 4 [[A0]], <128 x i1> [[V0]], <128 x i8> undef) ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[A2]], ptr align 4 [[V1]], <128 x i1> [[V0]]) ; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]] ; CHECK-NEXT: ret <128 x i8> [[V4]] ; %v0 = icmp eq <128 x i8> %a1, %a2 %v1 = getelementptr <128 x i8>, ptr %a0, i32 1 %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) call void @llvm.masked.store.v128i8.p0(<128 x i8> %a2, ptr %v1, i32 4, <128 x i1> %v0) %v3 = call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) %v4 = add <128 x i8> %v2, %v3 ret <128 x i8> %v4 } define <4 x float> @forward_masked_load(ptr %0, ptr %1) { ; CHECK-LABEL: @forward_masked_load( ; CHECK-NEXT: [[LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0:%.*]], <4 x i1> splat (i1 true), <4 x float> zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD1]], ptr align 1 [[TMP1:%.*]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <4 x float> [[LOAD1]] ; %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) %load1 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) call void @llvm.masked.store.v4f32.p0(<4 x float> %load1, ptr %1, i32 1, <4 x i1> %mask) %load2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) ret <4 x float> %load2 } define <4 x float> @forward_masked_load_arbitrary_mask(ptr %loc_a, ptr %loc_b, <4 x i1> %mask) { ; CHECK-LABEL: @forward_masked_load_arbitrary_mask( ; CHECK-NEXT: [[LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[LOC_A:%.*]], <4 x i1> [[MASK:%.*]], <4 x float> zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD1]], ptr align 1 [[LOC_B:%.*]], <4 x i1> [[MASK]]) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[MASK]], <4 x float> [[LOAD1]], <4 x float> zeroinitializer ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %load1 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %loc_a, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) call void @llvm.masked.store.v4f32.p0(<4 x float> %load1, ptr %loc_b, i32 1, <4 x i1> %mask) %load2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %loc_b, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) ret <4 x float> %load2 } define <4 x float> @forward_binop_splat_i1_mask(ptr %0, ptr %1) { ; CHECK-LABEL: @forward_binop_splat_i1_mask( ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0:%.*]], <4 x i1> splat (i1 true), <4 x float> zeroinitializer) ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_0_16]], <4 x i1> splat (i1 true), <4 x float> zeroinitializer) ; CHECK-NEXT: [[FMUL:%.*]] = fmul <4 x float> [[LOAD_0_0]], [[LOAD_0_16]] ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <4 x float> [[FMUL]] ; %mask = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) %load.0.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) %gep.0.16 = getelementptr i8, ptr %0, i32 16 %load.0.16 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %gep.0.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) %fmul = fmul <4 x float> %load.0.0, %load.0.16 call void @llvm.masked.store.v4f32.p0(<4 x float> %fmul, ptr %1, i32 1, <4 x i1> %mask) %load.1.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) ret <4 x float> %load.1.0 } define <4 x float> @forward_binop_with_sel(ptr %0, ptr %1, i32 %a, i32 %b, <4 x float> %passthrough) { ; CHECK-LABEL: @forward_binop_with_sel( ; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[A:%.*]], i32 [[B:%.*]]) ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[TMP0:%.*]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 [[GEP_0_16]], <4 x i1> [[MASK]], <4 x float> zeroinitializer) ; CHECK-NEXT: [[FMUL:%.*]] = fmul <4 x float> [[LOAD_0_0]], [[LOAD_0_16]] ; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[FMUL]], ptr align 1 [[TMP1:%.*]], <4 x i1> [[MASK]]) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[MASK]], <4 x float> [[FMUL]], <4 x float> [[PASSTHROUGH:%.*]] ; CHECK-NEXT: ret <4 x float> [[TMP3]] ; %mask = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %a, i32 %b) %load.0.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) %gep.0.16 = getelementptr i8, ptr %0, i32 16 %load.0.16 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %gep.0.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) %fmul = fmul <4 x float> %load.0.0, %load.0.16 call void @llvm.masked.store.v4f32.p0(<4 x float> %fmul, ptr %1, i32 1, <4 x i1> %mask) %load.1.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> %passthrough) ret <4 x float> %load.1.0 } define @forward_masked_load_scalable(ptr %0, ptr %1, %passthrough) { ; CHECK-LABEL: @forward_masked_load_scalable( ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) ; CHECK-NEXT: [[LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], [[TMP3]], [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[LOAD1]], ptr align 1 [[TMP1:%.*]], [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = select [[TMP3]], [[LOAD1]], [[PASSTHROUGH]] ; CHECK-NEXT: ret [[TMP5]] ; %mask = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) %load1 = call @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, %mask, %passthrough) call void @llvm.masked.store.nxv4f32.p0( %load1, ptr %1, i32 1, %mask) %load2 = call @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, %mask, %passthrough) ret %load2 } define @forward_masked_load_scalable_type_mismatch(ptr %0, ptr %1, %passthrough) { ; CHECK-LABEL: @forward_masked_load_scalable_type_mismatch( ; CHECK-NEXT: [[MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) ; CHECK-NEXT: [[LOAD1:%.*]] = call @llvm.masked.load.nxv4f64.p0(ptr align 1 [[TMP0:%.*]], [[MASK]], zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[LOAD1]], ptr align 1 [[TMP1:%.*]], [[MASK]]) ; CHECK-NEXT: [[LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], [[MASK]], [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: ret [[LOAD2]] ; %mask = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) %load1 = call @llvm.masked.load.nxv4f64.p0(ptr %0, i32 1, %mask, zeroinitializer) call void @llvm.masked.store.nxv4f64.p0( %load1, ptr %1, i32 1, %mask) %load2 = call @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, %mask, %passthrough) ret %load2 } define @generate_sel_with_passthrough(ptr %0, ptr %1, %passthrough) { ; CHECK-LABEL: @generate_sel_with_passthrough( ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) ; CHECK-NEXT: [[LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], [[TMP3]], zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[LOAD1]], ptr align 1 [[TMP1:%.*]], [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = select [[TMP3]], [[LOAD1]], [[PASSTHROUGH:%.*]] ; CHECK-NEXT: ret [[TMP5]] ; %mask = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) %load1 = call @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, %mask, zeroinitializer) call void @llvm.masked.store.nxv4f32.p0( %load1, ptr %1, i32 1, %mask) %load2 = call @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, %mask, %passthrough) ret %load2 } define @forward_binop_with_sel_scalable(ptr %0, ptr %1, %passthrough) { ; CHECK-LABEL: @forward_binop_with_sel_scalable( ; CHECK-NEXT: [[MASK:%.*]] = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], [[MASK]], zeroinitializer) ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], [[MASK]], zeroinitializer) ; CHECK-NEXT: [[FMUL:%.*]] = fmul [[LOAD_0_0]], [[LOAD_0_16]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[FMUL]], ptr align 1 [[TMP1:%.*]], [[MASK]]) ; CHECK-NEXT: [[TMP3:%.*]] = select [[MASK]], [[FMUL]], [[PASSTHROUGH:%.*]] ; CHECK-NEXT: ret [[TMP3]] ; %mask = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) %load.0.0 = call @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, %mask, zeroinitializer) %gep.0.16 = getelementptr i8, ptr %0, i32 16 %load.0.16 = call @llvm.masked.load.nxv4f32.p0(ptr %gep.0.16, i32 1, %mask, zeroinitializer) %fmul = fmul %load.0.0, %load.0.16 call void @llvm.masked.store.nxv4f32.p0( %fmul, ptr %1, i32 1, %mask) %load.1.0 = call @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, %mask, %passthrough) ret %load.1.0 } define @load_mask_differs(ptr %0, ptr %1, %passthrough) { ; CHECK-LABEL: @load_mask_differs( ; CHECK-NEXT: [[MASK0:%.*]] = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) ; CHECK-NEXT: [[MASK1:%.*]] = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], [[MASK0]], zeroinitializer) ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], [[MASK0]], zeroinitializer) ; CHECK-NEXT: [[FMUL:%.*]] = fmul [[LOAD_0_0]], [[LOAD_0_16]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[FMUL]], ptr align 1 [[TMP1:%.*]], [[MASK0]]) ; CHECK-NEXT: [[LOAD_1_0:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], [[MASK1]], [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: ret [[LOAD_1_0]] ; %mask0 = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) %mask1 = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) %load.0.0 = call @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, %mask0, zeroinitializer) %gep.0.16 = getelementptr i8, ptr %0, i32 16 %load.0.16 = call @llvm.masked.load.nxv4f32.p0(ptr %gep.0.16, i32 1, %mask0, zeroinitializer) %fmul = fmul %load.0.0, %load.0.16 call void @llvm.masked.store.nxv4f32.p0( %fmul, ptr %1, i32 1, %mask0) %load.1.0 = call @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, %mask1, %passthrough) ret %load.1.0 } define @store_mask_differs(ptr %0, ptr %1, %passthrough) { ; CHECK-LABEL: @store_mask_differs( ; CHECK-NEXT: [[MASK0:%.*]] = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) ; CHECK-NEXT: [[MASK1:%.*]] = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) ; CHECK-NEXT: [[LOAD_0_0:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP0:%.*]], [[MASK0]], zeroinitializer) ; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 ; CHECK-NEXT: [[LOAD_0_16:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[GEP_0_16]], [[MASK0]], zeroinitializer) ; CHECK-NEXT: [[FMUL:%.*]] = fmul [[LOAD_0_0]], [[LOAD_0_16]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[FMUL]], ptr align 1 [[TMP1:%.*]], [[MASK1]]) ; CHECK-NEXT: [[LOAD_1_0:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 1 [[TMP1]], [[MASK0]], [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: ret [[LOAD_1_0]] ; %mask0 = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8) %mask1 = tail call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4) %load.0.0 = call @llvm.masked.load.nxv4f32.p0(ptr %0, i32 1, %mask0, zeroinitializer) %gep.0.16 = getelementptr i8, ptr %0, i32 16 %load.0.16 = call @llvm.masked.load.nxv4f32.p0(ptr %gep.0.16, i32 1, %mask0, zeroinitializer) %fmul = fmul %load.0.0, %load.0.16 call void @llvm.masked.store.nxv4f32.p0( %fmul, ptr %1, i32 1, %mask1) %load.1.0 = call @llvm.masked.load.nxv4f32.p0(ptr %1, i32 1, %mask0, %passthrough) ret %load.1.0 }