diff options
4 files changed, 50 insertions, 19 deletions
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index c9ff713..c73648f 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -563,7 +563,7 @@ legalizeGetHighLowi64Bytes(Instruction &I, } static void -legalizeLoadStoreOnArrayAllocas(Instruction &I, +legalizeScalarLoadStoreOnArrays(Instruction &I, SmallVectorImpl<Instruction *> &ToRemove, DenseMap<Value *, Value *> &) { @@ -581,23 +581,31 @@ legalizeLoadStoreOnArrayAllocas(Instruction &I, } else return; - assert(LoadStoreTy->isSingleValueType() && - "Expected load/store type to be a single-valued type"); + // If the load/store is not of a single-value type (i.e., scalar or vector) + // then we do not modify it. It shouldn't be a vector either because the + // dxil-data-scalarization pass is expected to run before this, but it's not + // incorrect to apply this transformation to vector load/stores. + if (!LoadStoreTy->isSingleValueType()) + return; - auto *AllocaPtrOp = dyn_cast<AllocaInst>(PtrOp); - if (!AllocaPtrOp) + Type *ArrayTy; + if (auto *GlobalVarPtrOp = dyn_cast<GlobalVariable>(PtrOp)) + ArrayTy = GlobalVarPtrOp->getValueType(); + else if (auto *AllocaPtrOp = dyn_cast<AllocaInst>(PtrOp)) + ArrayTy = AllocaPtrOp->getAllocatedType(); + else return; - Type *Ty = AllocaPtrOp->getAllocatedType(); - if (!isa<ArrayType>(Ty)) + if (!isa<ArrayType>(ArrayTy)) return; - assert(!isa<ArrayType>(Ty->getArrayElementType()) && - "Expected allocated type of AllocaInst to be a flat ArrayType"); - IRBuilder<> Builder(&I); - Value *Zero = Builder.getInt32(0); - Value *GEP = Builder.CreateGEP(Ty, AllocaPtrOp, {Zero, Zero}, "", - GEPNoWrapFlags::all()); + assert(ArrayTy->getArrayElementType() == LoadStoreTy && + "Expected array element type to be the same as to the scalar load or " + "store type"); + + Value *Zero = ConstantInt::get(Type::getInt32Ty(I.getContext()), 0); + Value *GEP = GetElementPtrInst::Create( + ArrayTy, PtrOp, {Zero, Zero}, GEPNoWrapFlags::all(), "", I.getIterator()); I.setOperand(PtrOpIndex, GEP); } @@ -651,7 +659,7 @@ private: // downcastI64toI32InsertExtractElements needs to handle. LegalizationPipeline[Stage2].push_back( downcastI64toI32InsertExtractElements); - LegalizationPipeline[Stage2].push_back(legalizeLoadStoreOnArrayAllocas); + LegalizationPipeline[Stage2].push_back(legalizeScalarLoadStoreOnArrays); } }; diff --git a/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll b/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll index b25b3de..c6789ac 100644 --- a/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll +++ b/llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll @@ -21,3 +21,21 @@ define void @store() { store i32 0, ptr %a, align 4
ret void
}
+
+@g = local_unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
+define void @load_whole_global () {
+; CHECK-LABEL: define void @load_whole_global
+; CHECK-NEXT: load [4 x i32], ptr addrspace(3) @g, align 4
+; CHECK-NEXT: ret void
+ %l = load [4 x i32], ptr addrspace(3) @g, align 4
+ ret void
+}
+
+define void @load_global_index0 () {
+; CHECK-LABEL: define void @load_global_index0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @g, i32 0, i32 0
+; CHECK-NEXT: load i32, ptr addrspace(3) [[GEP]], align 4
+; CHECK-NEXT: ret void
+ %l = load i32, ptr addrspace(3) @g, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll index 27a8925..0c91c53 100644 --- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll +++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll @@ -24,7 +24,8 @@ define <4 x i32> @load_array_vec_test() #0 { ; CHECK-LABEL: define <4 x i32> @load_array_vec_test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 4 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 4 @@ -52,7 +53,8 @@ define <4 x i32> @load_array_vec_test() #0 { define <4 x i32> @load_vec_test() #0 { ; CHECK-LABEL: define <4 x i32> @load_vec_test( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4 @@ -203,7 +205,8 @@ define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(i32 %index) #0 { define <4 x i32> @multid_load_test() #0 { ; CHECK-LABEL: define <4 x i32> @multid_load_test( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 2), align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 3), align 4 diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll index a124c66..4394235 100644 --- a/llvm/test/CodeGen/DirectX/scalar-store.ll +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -14,7 +14,8 @@ ; CHECK-LABEL: store_array_vec_test define void @store_array_vec_test () local_unnamed_addr #0 { -; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0 +; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) [[GEP]], align 16 ; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4 ; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 8 ; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 16 @@ -30,7 +31,8 @@ define void @store_array_vec_test () local_unnamed_addr #0 { ; CHECK-LABEL: store_vec_test define void @store_vec_test(<4 x i32> %inputVec) #0 { ; CHECK-NEXT: [[INPUTVEC_I01:%.*]] = extractelement <4 x i32> %inputVec, i32 0 -; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) @vecData.scalarized, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0 +; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) [[GEP]], align 4 ; CHECK-NEXT: [[INPUTVEC_I12:%.*]] = extractelement <4 x i32> %inputVec, i32 1 ; CHECK-NEXT: store i32 [[INPUTVEC_I12]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4 ; CHECK-NEXT: [[INPUTVEC_I23:%.*]] = extractelement <4 x i32> %inputVec, i32 2 |