diff options
author | Changpeng Fang <changpeng.fang@amd.com> | 2023-08-11 16:37:42 -0700 |
---|---|---|
committer | Changpeng Fang <changpeng.fang@amd.com> | 2023-08-11 16:37:42 -0700 |
commit | d77c62053c944652846c00a35c921e14b43b1877 (patch) | |
tree | d781bf3cc238972f60ace088449923b21c39850a /clang/lib/CodeGen/CGCall.cpp | |
parent | 9e3d9c9eae03910d93e2312e1e0845433c779998 (diff) | |
download | llvm-d77c62053c944652846c00a35c921e14b43b1877.zip llvm-d77c62053c944652846c00a35c921e14b43b1877.tar.gz llvm-d77c62053c944652846c00a35c921e14b43b1877.tar.bz2 |
[clang][AMDGPU]: Don't use byval for struct arguments in function ABI
Summary:
Byval requires allocating additional stack space, and always requires an implicit copy to be inserted in codegen,
where it can be difficult to optimize. In this work, we use byref/IndirectAliased promotion method instead of
byval with the implicit copy semantics.
Reviewers:
arsenm
Differential Revision:
https://reviews.llvm.org/D155986
Diffstat (limited to 'clang/lib/CodeGen/CGCall.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGCall.cpp | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 28c3bc7c..2b5121a 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2156,7 +2156,8 @@ static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types, const llvm::DataLayout &DL, const ABIArgInfo &AI, bool CheckCoerce = true) { llvm::Type *Ty = Types.ConvertTypeForMem(QTy); - if (AI.getKind() == ABIArgInfo::Indirect) + if (AI.getKind() == ABIArgInfo::Indirect || + AI.getKind() == ABIArgInfo::IndirectAliased) return true; if (AI.getKind() == ABIArgInfo::Extend) return true; @@ -5126,12 +5127,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto LV = I->getKnownLValue(); auto AS = LV.getAddressSpace(); - if (!ArgInfo.getIndirectByVal() || + bool isByValOrRef = + ArgInfo.isIndirectAliased() || ArgInfo.getIndirectByVal(); + + if (!isByValOrRef || (LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) { NeedCopy = true; } if (!getLangOpts().OpenCL) { - if ((ArgInfo.getIndirectByVal() && + if ((isByValOrRef && (AS != LangAS::Default && AS != CGM.getASTAllocaAddressSpace()))) { NeedCopy = true; @@ -5139,7 +5143,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } // For OpenCL even if RV is located in default or alloca address space // we don't want to perform address space cast for it. - else if ((ArgInfo.getIndirectByVal() && + else if ((isByValOrRef && Addr.getType()->getAddressSpace() != IRFuncTy-> getParamType(FirstIRArg)->getPointerAddressSpace())) { NeedCopy = true; |