; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -passes=newgvn,dce | FileCheck %s ; Analyze Load from clobbering Load. define @load_store_clobber_load(ptr %p) { ; CHECK-LABEL: @load_store_clobber_load( ; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 ; CHECK-NEXT: store zeroinitializer, ptr undef, align 16 ; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] ; CHECK-NEXT: ret [[ADD]] ; %load1 = load , ptr %p store zeroinitializer, ptr undef %load2 = load , ptr %p ; <- load to be eliminated %add = add %load1, %load2 ret %add } define @load_store_clobber_load_mayalias(ptr %p, ptr %p2) { ; CHECK-LABEL: @load_store_clobber_load_mayalias( ; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 ; CHECK-NEXT: store zeroinitializer, ptr [[P2:%.*]], align 16 ; CHECK-NEXT: [[LOAD2:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: [[SUB:%.*]] = sub [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: ret [[SUB]] ; %load1 = load , ptr %p store zeroinitializer, ptr %p2 %load2 = load , ptr %p %sub = sub %load1, %load2 ret %sub } define @load_store_clobber_load_noalias(ptr noalias %p, ptr noalias %p2) { ; CHECK-LABEL: @load_store_clobber_load_noalias( ; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 ; CHECK-NEXT: store zeroinitializer, ptr [[P2:%.*]], align 16 ; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] ; CHECK-NEXT: ret [[ADD]] ; %load1 = load , ptr %p store zeroinitializer, ptr %p2 %load2 = load , ptr %p ; <- load to be eliminated %add = add %load1, %load2 ret %add } ; BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias. define i32 @load_clobber_load_gep1(ptr %p) { ; CHECK-LABEL: @load_clobber_load_gep1( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P]], i64 1 ; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: ret i32 [[ADD]] ; %gep1 = getelementptr , ptr %p, i64 0, i64 1 %load1 = load i32, ptr %gep1 %gep2 = getelementptr i32, ptr %p, i64 1 %load2 = load i32, ptr %gep2 ; <- load could be eliminated %add = add i32 %load1, %load2 ret i32 %add } define i32 @load_clobber_load_gep2(ptr %p) { ; CHECK-LABEL: @load_clobber_load_gep2( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P]], i64 4 ; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: ret i32 [[ADD]] ; %gep1 = getelementptr , ptr %p, i64 1, i64 0 %load1 = load i32, ptr %gep1 %gep2 = getelementptr i32, ptr %p, i64 4 %load2 = load i32, ptr %gep2 ; <- can not determine at compile-time if %load1 and %load2 are same addr %add = add i32 %load1, %load2 ret i32 %add } ; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias. define i32 @load_clobber_load_gep3(ptr %p) { ; CHECK-LABEL: @load_clobber_load_gep3( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 1, i64 0 ; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 ; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]] ; CHECK-NEXT: ret i32 [[ADD]] ; %gep1 = getelementptr , ptr %p, i64 1, i64 0 %load1 = load i32, ptr %gep1 %gep2 = getelementptr , ptr %p, i64 1, i64 0 %load2 = load float, ptr %gep2 ; <- load could be eliminated %cast = bitcast float %load2 to i32 %add = add i32 %load1, %cast ret i32 %add } define @load_clobber_load_fence(ptr %p) { ; CHECK-LABEL: @load_clobber_load_fence( ; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 ; CHECK-NEXT: call void asm "", "~{memory}"() ; CHECK-NEXT: [[LOAD2:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: [[SUB:%.*]] = sub [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: ret [[SUB]] ; %load1 = load , ptr %p call void asm "", "~{memory}"() %load2 = load , ptr %p %sub = sub %load1, %load2 ret %sub } define @load_clobber_load_sideeffect(ptr %p) { ; CHECK-LABEL: @load_clobber_load_sideeffect( ; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 ; CHECK-NEXT: call void asm sideeffect "", ""() ; CHECK-NEXT: [[LOAD2:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: ret [[ADD]] ; %load1 = load , ptr %p call void asm sideeffect "", ""() %load2 = load , ptr %p %add = add %load1, %load2 ret %add } ; Analyze Load from clobbering Store. define @store_forward_to_load(ptr %p) { ; CHECK-LABEL: @store_forward_to_load( ; CHECK-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret zeroinitializer ; store zeroinitializer, ptr %p %load = load , ptr %p ret %load } define @store_forward_to_load_sideeffect(ptr %p) { ; CHECK-LABEL: @store_forward_to_load_sideeffect( ; CHECK-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 ; CHECK-NEXT: call void asm sideeffect "", ""() ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store zeroinitializer, ptr %p call void asm sideeffect "", ""() %load = load , ptr %p ret %load } define i32 @store_clobber_load() { ; CHECK-LABEL: @store_clobber_load( ; CHECK-NEXT: [[ALLOC:%.*]] = alloca , align 16 ; CHECK-NEXT: store undef, ptr [[ALLOC]], align 16 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr , ptr [[ALLOC]], i32 0, i32 1 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; %alloc = alloca store undef, ptr %alloc %ptr = getelementptr , ptr %alloc, i32 0, i32 1 %load = load i32, ptr %ptr ret i32 %load } ; Analyze Load from clobbering MemInst. declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) define i32 @memset_clobber_load(ptr %p) { ; CHECK-LABEL: @memset_clobber_load( ; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false) ; CHECK-NEXT: ret i32 16843009 ; tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false) %gep = getelementptr , ptr %p, i64 0, i64 5 %load = load i32, ptr %gep ret i32 %load } define i32 @memset_clobber_load_vscaled_base(ptr %p) { ; CHECK-LABEL: @memset_clobber_load_vscaled_base( ; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P]], i64 1, i64 1 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false) %gep = getelementptr , ptr %p, i64 1, i64 1 %load = load i32, ptr %gep ret i32 %load } define i32 @memset_clobber_load_nonconst_index(ptr %p, i64 %idx1, i64 %idx2) { ; CHECK-LABEL: @memset_clobber_load_nonconst_index( ; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P]], i64 [[IDX1:%.*]], i64 [[IDX2:%.*]] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false) %gep = getelementptr , ptr %p, i64 %idx1, i64 %idx2 %load = load i32, ptr %gep ret i32 %load } ; Load elimination across BBs define ptr @load_from_alloc_replaced_with_undef() { ; CHECK-LABEL: @load_from_alloc_replaced_with_undef( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca , align 16 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr , ptr [[A]], i64 0, i64 1 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[LOAD]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store zeroinitializer, ptr [[A]], align 16 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret ptr [[A]] ; entry: %a = alloca %gep = getelementptr , ptr %a, i64 0, i64 1 %load = load i32, ptr %gep ; <- load to be eliminated %tobool = icmp eq i32 %load, 0 ; <- icmp to be eliminated br i1 %tobool, label %if.end, label %if.then if.then: store zeroinitializer, ptr %a br label %if.end if.end: ret ptr %a } define i32 @redundant_load_elimination_1(ptr %p) { ; CHECK-LABEL: @redundant_load_elimination_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 1 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret i32 [[LOAD1]] ; entry: %gep = getelementptr , ptr %p, i64 1, i64 1 %load1 = load i32, ptr %gep %cmp = icmp eq i32 %load1, 0 br i1 %cmp, label %if.then, label %if.end if.then: %load2 = load i32, ptr %gep ; <- load to be eliminated %add = add i32 %load1, %load2 br label %if.end if.end: %result = phi i32 [ %add, %if.then ], [ %load1, %entry ] ret i32 %result } ; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as NoAlias. define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) { ; CHECK-LABEL: @redundant_load_elimination_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 1 ; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 1, i64 0 ; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4 ; CHECK-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void ; entry: %gep1 = getelementptr , ptr %p, i64 1, i64 1 store i32 0, ptr %gep1 %gep2 = getelementptr , ptr %p, i64 1, i64 0 store i32 1, ptr %gep2 br i1 %c, label %if.else, label %if.then if.then: %t = load i32, ptr %gep1 ; <- load could be eliminated store i32 %t, ptr %q ret void if.else: ret void } define void @redundant_load_elimination_zero_index(i1 %c, ptr %p, ptr %q) { ; CHECK-LABEL: @redundant_load_elimination_zero_index( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 ; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4 ; CHECK-NEXT: store i32 1, ptr [[P]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void ; entry: %gep1 = getelementptr , ptr %p, i64 0, i64 1 store i32 0, ptr %gep1 store i32 1, ptr %p br i1 %c, label %if.else, label %if.then if.then: %t = load i32, ptr %gep1 ; <- load could be eliminated store i32 %t, ptr %q ret void if.else: ret void } define void @redundant_load_elimination_zero_index_1(i1 %c, ptr %p, ptr %q, i64 %i) { ; CHECK-LABEL: @redundant_load_elimination_zero_index_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[J:%.*]] = add i64 [[I:%.*]], 1 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[J]] ; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 0, i64 [[I]] ; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void ; entry: %j = add i64 %i, 1 %gep1 = getelementptr , ptr %p, i64 0, i64 %j store i32 0, ptr %gep1 %gep2 = getelementptr , ptr %p, i64 0, i64 %i store i32 1, ptr %gep2 br i1 %c, label %if.else, label %if.then if.then: %t = load i32, ptr %gep1 ; <- load could be eliminated store i32 %t, ptr %q ret void if.else: ret void } ; TODO: load in if.then could have been eliminated define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, %v) { ; CHECK-LABEL: @missing_load_elimination( ; CHECK-NEXT: entry: ; CHECK-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[P1:%.*]] = getelementptr , ptr [[P]], i64 1 ; CHECK-NEXT: store [[V:%.*]], ptr [[P1]], align 16 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[T:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: store [[T]], ptr [[Q:%.*]], align 16 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void ; entry: store zeroinitializer, ptr %p %p1 = getelementptr , ptr %p, i64 1 store %v, ptr %p1 br i1 %c, label %if.else, label %if.then if.then: %t = load , ptr %p ; load could be eliminated store %t, ptr %q ret void if.else: ret void } ; Different sizes / types define @load_v16i8_store_v4i32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v4f32_store_v4i32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v4f32_store_v16i8_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v4i32_store_v4f32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v4i32_store_v4i64_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v4i32_store_v4i64_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 32 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v4i64_store_v4i32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v4i64_store_v4i32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 32 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v2i32_store_v4i32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 8 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v2i32_store_v4i32_forward_load_offsets(ptr %p, %x) { ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsets( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[Q:%.*]] = getelementptr , ptr [[P]], i64 1 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[Q]], align 8 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %q = getelementptr , ptr %p, i64 1 %load = load , ptr %q ret %load } define @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, %x) { ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsetc( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[Q:%.*]] = getelementptr <2 x i32>, ptr [[P]], i64 1 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[Q]], align 8 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %q = getelementptr <2 x i32>, ptr %p, i64 1 %load = load , ptr %q ret %load } define @load_v2p0_store_v4i32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_v2i64_store_v2p0_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p ret %load } define @load_nxv16i8_store_v4i32_forward_load(ptr %p, <4 x i32> %x) { ; CHECK-LABEL: @load_nxv16i8_store_v4i32_forward_load( ; CHECK-NEXT: store <4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store <4 x i32> %x, ptr %p %load = load , ptr %p ret %load } define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v16i8_store_nxv4i32_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[P]], align 16 ; CHECK-NEXT: ret <16 x i8> [[LOAD]] ; store %x, ptr %p %load = load <16 x i8>, ptr %p ret <16 x i8> %load } define @load_v16i8_store_v4i32_forward_constant(ptr %p) { ; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant( ; CHECK-NEXT: store splat (i32 4), ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store splat (i32 4), ptr %p %load = load , ptr %p ret %load } define @load_v16i8_struct_store_v4i32_forward_load(ptr %p, { } %x) { ; CHECK-LABEL: @load_v16i8_struct_store_v4i32_forward_load( ; CHECK-NEXT: store { } [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 ; CHECK-NEXT: ret [[LOAD]] ; store { } %x, ptr %p %load = load , ptr %p ret %load } define {} @load_v16i8_store_v4i32_struct_forward_load(ptr %p, %x) { ; CHECK-LABEL: @load_v16i8_store_v4i32_struct_forward_load( ; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load { }, ptr [[P]], align 16 ; CHECK-NEXT: ret { } [[LOAD]] ; store %x, ptr %p %load = load { }, ptr %p ret { } %load } define { , , , } @bigexample({ , , , } %a) vscale_range(1,16) { ; CHECK-LABEL: @bigexample( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { , , , }, align 16 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP]]) ; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { , , , } [[A:%.*]], 0 ; CHECK-NEXT: store [[A_ELT]], ptr [[REF_TMP]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4 ; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]] ; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { , , , } [[A]], 1 ; CHECK-NEXT: store [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5 ; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]] ; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { , , , } [[A]], 2 ; CHECK-NEXT: store [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48 ; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]] ; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { , , , } [[A]], 3 ; CHECK-NEXT: store [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16 ; CHECK-NEXT: [[DOTUNPACK:%.*]] = load , ptr [[REF_TMP]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[DOTUNPACK]], 0 ; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load , ptr [[REF_TMP_REPACK1]], align 16 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[DOTUNPACK8]], 1 ; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load , ptr [[REF_TMP_REPACK3]], align 16 ; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[DOTUNPACK10]], 2 ; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load , ptr [[REF_TMP_REPACK5]], align 16 ; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[DOTUNPACK12]], 3 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP]]) ; CHECK-NEXT: ret { , , , } [[TMP15]] ; entry: %ref.tmp = alloca { , , , }, align 16 call void @llvm.lifetime.start.p0(ptr nonnull %ref.tmp) %a.elt = extractvalue { , , , } %a, 0 store %a.elt, ptr %ref.tmp, align 16 %0 = call i64 @llvm.vscale.i64() %1 = shl i64 %0, 4 %ref.tmp.repack1 = getelementptr inbounds i8, ptr %ref.tmp, i64 %1 %a.elt2 = extractvalue { , , , } %a, 1 store %a.elt2, ptr %ref.tmp.repack1, align 16 %2 = call i64 @llvm.vscale.i64() %3 = shl i64 %2, 5 %ref.tmp.repack3 = getelementptr inbounds i8, ptr %ref.tmp, i64 %3 %a.elt4 = extractvalue { , , , } %a, 2 store %a.elt4, ptr %ref.tmp.repack3, align 16 %4 = call i64 @llvm.vscale.i64() %5 = mul i64 %4, 48 %ref.tmp.repack5 = getelementptr inbounds i8, ptr %ref.tmp, i64 %5 %a.elt6 = extractvalue { , , , } %a, 3 store %a.elt6, ptr %ref.tmp.repack5, align 16 %.unpack = load , ptr %ref.tmp, align 16 %6 = insertvalue { , , , } poison, %.unpack, 0 %7 = call i64 @llvm.vscale.i64() %8 = shl i64 %7, 4 %.elt7 = getelementptr inbounds i8, ptr %ref.tmp, i64 %8 %.unpack8 = load , ptr %.elt7, align 16 %9 = insertvalue { , , , } %6, %.unpack8, 1 %10 = call i64 @llvm.vscale.i64() %11 = shl i64 %10, 5 %.elt9 = getelementptr inbounds i8, ptr %ref.tmp, i64 %11 %.unpack10 = load , ptr %.elt9, align 16 %12 = insertvalue { , , , } %9, %.unpack10, 2 %13 = call i64 @llvm.vscale.i64() %14 = mul i64 %13, 48 %.elt11 = getelementptr inbounds i8, ptr %ref.tmp, i64 %14 %.unpack12 = load , ptr %.elt11, align 16 %15 = insertvalue { , , , } %12, %.unpack12, 3 call void @llvm.lifetime.end.p0(ptr nonnull %ref.tmp) ret { , , , } %15 } define @scalable_store_to_fixed_load( %.coerce) vscale_range(4,4) { ; CHECK-LABEL: @scalable_store_to_fixed_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64 ; CHECK-NEXT: [[TMP0:%.*]] = fadd [[DOTCOERCE:%.*]], [[DOTCOERCE]] ; CHECK-NEXT: store [[TMP0]], ptr [[RETVAL]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64 ; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> [[TMP1]], i64 0) ; CHECK-NEXT: ret [[CAST_SCALABLE]] ; entry: %retval = alloca { <16 x float> } %0 = fadd %.coerce, %.coerce store %0, ptr %retval %1 = load <16 x float>, ptr %retval %cast.scalable = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> %1, i64 0) ret %cast.scalable } ; Here, only the lower bound for the vscale is known, but this is enough to allow a forward to a load to 16 elements. define @scalable_store_to_fixed_load_only_lower_bound( %a) vscale_range(4) { ; CHECK-LABEL: @scalable_store_to_fixed_load_only_lower_bound( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca { }, align 16 ; CHECK-NEXT: store [[A:%.*]], ptr [[RETVAL]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64 ; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> [[TMP0]], i64 0) ; CHECK-NEXT: ret [[CAST_SCALABLE]] ; entry: %retval = alloca { } store %a, ptr %retval %1 = load <16 x float>, ptr %retval %cast.scalable = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> %1, i64 0) ret %cast.scalable } define @scalable_store_to_fixed_load_with_offset( %a) vscale_range(4,4) { ; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128 ; CHECK-NEXT: store [[A:%.*]], ptr [[PTR]], align 16 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64 ; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> [[TMP0]], i64 0) ; CHECK-NEXT: ret [[CAST_SCALABLE]] ; entry: %ptr = alloca { <32 x float> } store %a, ptr %ptr %gep = getelementptr inbounds i8, ptr %ptr, i64 8 %1 = load <16 x float>, ptr %gep %cast.scalable = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> %1, i64 0) ret %cast.scalable } define @scalable_store_to_fixed_load_unknown_vscale( %.coerce) { ; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64 ; CHECK-NEXT: [[TMP0:%.*]] = fadd [[DOTCOERCE:%.*]], [[DOTCOERCE]] ; CHECK-NEXT: store [[TMP0]], ptr [[RETVAL]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64 ; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> [[TMP1]], i64 0) ; CHECK-NEXT: ret [[CAST_SCALABLE]] ; entry: %retval = alloca { <16 x float> } %0 = fadd %.coerce, %.coerce store %0, ptr %retval %1 = load <16 x float>, ptr %retval %cast.scalable = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> %1, i64 0) ret %cast.scalable } define @scalable_store_to_fixed_load_size_missmatch( %.coerce) vscale_range(4,4) { ; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128 ; CHECK-NEXT: [[TMP0:%.*]] = fadd [[DOTCOERCE:%.*]], [[DOTCOERCE]] ; CHECK-NEXT: store [[TMP0]], ptr [[RETVAL]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128 ; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v32f32( poison, <32 x float> [[TMP1]], i64 0) ; CHECK-NEXT: ret [[CAST_SCALABLE]] ; entry: %retval = alloca { <32 x float> } %0 = fadd %.coerce, %.coerce store %0, ptr %retval %1 = load <32 x float>, ptr %retval %cast.scalable = tail call @llvm.vector.insert.nxv4f32.v32f32( poison, <32 x float> %1, i64 0) ret %cast.scalable } define @scalable_store_to_fixed_load_different_types( %a) vscale_range(4,4) { ; CHECK-LABEL: @scalable_store_to_fixed_load_different_types( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR:%.*]] = alloca { <16 x float> }, align 64 ; CHECK-NEXT: store [[A:%.*]], ptr [[PTR]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[PTR]], align 64 ; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TMP0]], i64 0) ; CHECK-NEXT: ret [[CAST_SCALABLE]] ; entry: %ptr = alloca { <16 x float> } store %a, ptr %ptr %1 = load <16 x i32>, ptr %ptr %cast.scalable = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> %1, i64 0) ret %cast.scalable } ; This function does not have a fixed vscale, but the loaded vector is still known ; to be smaller or equal in size compared to the stored vector. define <4 x float> @scalable_store_to_small_fixed_load( %a) { ; CHECK-LABEL: @scalable_store_to_small_fixed_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR:%.*]] = alloca , align 16 ; CHECK-NEXT: store [[A:%.*]], ptr [[PTR]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16 ; CHECK-NEXT: ret <4 x float> [[TMP0]] ; entry: %ptr = alloca store %a, ptr %ptr %1 = load <4 x float>, ptr %ptr ret <4 x float> %1 }