aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/TargetInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/TargetInfo.cpp')
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp207
1 files changed, 190 insertions, 17 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d2fcbea..44647a6 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7378,38 +7378,147 @@ public:
namespace {
class AMDGPUABIInfo final : public DefaultABIInfo {
+private:
+ static const unsigned MaxNumRegsForArgsRet = 16;
+
+ bool shouldReturnTypeInRegister(QualType Ty,
+ ASTContext &Context) const;
+ unsigned numRegsForType(QualType Ty) const;
+
+ bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+ bool isHomogeneousAggregateSmallEnough(const Type *Base,
+ uint64_t Members) const override;
+
public:
- explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+ explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
+ DefaultABIInfo(CGT) {}
-private:
- ABIArgInfo classifyArgumentType(QualType Ty) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
};
+bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+ return true;
+}
+
+bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
+ const Type *Base, uint64_t Members) const {
+ uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
+
+ // Homogeneous Aggregates may occupy at most 16 registers.
+ return Members * NumRegs <= MaxNumRegsForArgsRet;
+}
+
+/// Check whether the type is small enough to consider passing directly in
+/// registers.
+bool AMDGPUABIInfo::shouldReturnTypeInRegister(QualType Ty,
+ ASTContext &Ctx) const {
+ return ((Ctx.getTypeSize(Ty) + 31) / 32) <= MaxNumRegsForArgsRet;
+}
+
+/// Estimate number of registers the type will use when passed in registers.
+unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
+ unsigned NumRegs = 0;
+
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // Compute from the number of elements. The reported size is based on the
+ // in-memory size, which includes the padding 4th element for 3-vectors.
+ QualType EltTy = VT->getElementType();
+ unsigned EltSize = getContext().getTypeSize(EltTy);
+
+ // 16-bit element vectors should be passed as packed.
+ if (EltSize == 16)
+ return (VT->getNumElements() + 1) / 2;
+
+ unsigned EltNumRegs = (EltSize + 31) / 32;
+ return EltNumRegs * VT->getNumElements();
+ }
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ assert(!RD->hasFlexibleArrayMember());
+
+ for (const FieldDecl *Field : RD->fields()) {
+ QualType FieldTy = Field->getType();
+ NumRegs += numRegsForType(FieldTy);
+ }
+
+ return NumRegs;
+ }
+
+ return (getContext().getTypeSize(Ty) + 31) / 32;
+}
+
void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ llvm::CallingConv::ID CC = FI.getCallingConvention();
+
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- unsigned CC = FI.getCallingConvention();
- for (auto &Arg : FI.arguments())
- if (CC == llvm::CallingConv::AMDGPU_KERNEL)
- Arg.info = classifyArgumentType(Arg.type);
- else
- Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type);
+ unsigned NumRegsLeft = MaxNumRegsForArgsRet;
+ for (auto &Arg : FI.arguments()) {
+ if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
+ Arg.info = classifyKernelArgumentType(Arg.type);
+ } else {
+ Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
+ }
+ }
}
-/// \brief Classify argument of given type \p Ty.
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const {
- llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
- if (!StrTy) {
- return DefaultABIInfo::classifyArgumentType(Ty);
+ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
+ if (isAggregateTypeForABI(RetTy)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // returned by value.
+ if (!getRecordArgABI(RetTy, getCXXABI())) {
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), RetTy, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just return a regular value.
+ if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const RecordType *RT = RetTy->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyReturnType(RetTy);
+ }
+
+ // Pack aggregates <= 4 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ if (Size <= 64) {
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
+ return ABIArgInfo::getDirect();
+ }
}
+ // Otherwise just do the default thing.
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
+/// For kernels all parameters are really passed in a special buffer. It doesn't
+/// make sense to pass anything byval, so everything must be direct.
+ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // TODO: Can we omit empty structs?
+
// Coerce single element structs to its element.
- if (StrTy->getNumElements() == 1) {
- return ABIArgInfo::getDirect();
- }
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
@@ -7417,6 +7526,70 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
}
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ unsigned &NumRegsLeft) const {
+ assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
+
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ if (isAggregateTypeForABI(Ty)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // passed by value.
+ if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just pass a regular value. TODO: We
+ // could do reasonable-size multiple-element structs too, using getExpand(),
+ // though watch out for things like bitfields.
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyArgumentType(Ty);
+ }
+
+ // Pack aggregates <= 8 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64) {
+ unsigned NumRegs = (Size + 31) / 32;
+ NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ // XXX: Should this be i64 instead, and should the limit increase?
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (NumRegsLeft > 0) {
+ unsigned NumRegs = numRegsForType(Ty);
+ if (NumRegsLeft >= NumRegs) {
+ NumRegsLeft -= NumRegs;
+ return ABIArgInfo::getDirect();
+ }
+ }
+ }
+
+ // Otherwise just do the default thing.
+ ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
+ if (!ArgInfo.isIndirect()) {
+ unsigned NumRegs = numRegsForType(Ty);
+ NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
+ }
+
+ return ArgInfo;
+}
+
class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)