diff options
author | Julian Brown <julian.brown@amd.com> | 2025-07-24 14:45:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-24 14:45:04 +0100 |
commit | 889faabe78d2cb182414430601d6da2ed6619be3 (patch) | |
tree | 3cc2bc690471f39dbcb93858d37604a5cb31bb85 /clang/lib | |
parent | 2149d6d1c9a1ad62761dc007ec6c6dedf6eb2fc0 (diff) | |
download | llvm-889faabe78d2cb182414430601d6da2ed6619be3.zip llvm-889faabe78d2cb182414430601d6da2ed6619be3.tar.gz llvm-889faabe78d2cb182414430601d6da2ed6619be3.tar.bz2 |
[OpenMP] Don't emit redundant zero-sized mapping nodes for overlapped structs (#148947)
The handling of overlapped structure mapping in CGOpenMPRuntime.cpp can
lead to redundant zero-sized mapping nodes at runtime. This patch fixes
it using a combination of approaches: trivially adjacent struct members
won't have a mapping node created between them, and for more complicated
cases (inheritance) the physical layout of the struct/class is used to
make sure that elements aren't missed.
I've introduced a new class to track the state whilst iterating over the
struct. This reduces a bit of redundancy in the code (accumulating
CombinedInfo both during and after the loop), which I think is a bit
neater.
Before:
omptarget --> Entry 0: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=48, Type=0x20, Name=unknown
omptarget --> Entry 1: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 2: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 3: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 4: Base=0x00007fff8d483830, Begin=0x00007fff8d48383c, Size=20, Type=0x1000000000003, Name=unknown
omptarget --> Entry 5: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 6: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 7: Base=0x00007fff8d483830, Begin=0x00007fff8d48385c, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 8: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 9: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 10: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 11: Base=0x00007fff8d483840, Begin=0x00005e7665275130, Size=32, Type=0x1000000000013, Name=unknown
omptarget --> Entry 12: Base=0x00007fff8d483830, Begin=0x00007fff8d483850, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 13: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 14: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=4, Type=0x1000000000003, Name=unknown
After:
omptarget --> Entry 0: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=48, Type=0x20, Name=unknown
omptarget --> Entry 1: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562ec, Size=20, Type=0x1000000000003, Name=unknown
omptarget --> Entry 2: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f5630c, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 3: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 4: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e4, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 5: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e8, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 6: Base=0x00007fffd0f562f0, Begin=0x000058b6013fb130, Size=32, Type=0x1000000000013, Name=unknown
omptarget --> Entry 7: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56300, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 8: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56304, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 9: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56308, Size=4, Type=0x1000000000003, Name=unknown
For code:
#include <cstdlib>
#include <cstdio>
struct S {
int x;
int y;
int z;
int *p1;
int *p2;
};
struct T : public S {
int a;
int b;
int c;
};
int main() {
T v;
v.p1 = (int*) calloc(8, sizeof(int));
v.p2 = (int*) calloc(8, sizeof(int));
#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
{
v.x++;
v.y += 2;
v.z += 3;
v.p1[0] += 4;
v.a += 7;
v.b += 5;
v.c += 6;
}
return 0;
}
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 157 |
1 files changed, 110 insertions, 47 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ce2dd4d..f1698a0 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7080,6 +7080,110 @@ private: return ConstLength.getSExtValue() != 1; } + /// A helper class to copy structures with overlapped elements, i.e. those + /// which have mappings of both "s" and "s.mem". Consecutive elements that + /// are not explicitly copied have mapping nodes synthesized for them, + /// taking care to avoid generating zero-sized copies. + class CopyOverlappedEntryGaps { + CodeGenFunction &CGF; + MapCombinedInfoTy &CombinedInfo; + OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE; + const ValueDecl *MapDecl = nullptr; + const Expr *MapExpr = nullptr; + Address BP = Address::invalid(); + bool IsNonContiguous = false; + uint64_t DimSize = 0; + // These elements track the position as the struct is iterated over + // (in order of increasing element address). + const RecordDecl *LastParent = nullptr; + uint64_t Cursor = 0; + unsigned LastIndex = -1u; + Address LB = Address::invalid(); + + public: + CopyOverlappedEntryGaps(CodeGenFunction &CGF, + MapCombinedInfoTy &CombinedInfo, + OpenMPOffloadMappingFlags Flags, + const ValueDecl *MapDecl, const Expr *MapExpr, + Address BP, Address LB, bool IsNonContiguous, + uint64_t DimSize) + : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl), + MapExpr(MapExpr), BP(BP), LB(LB), IsNonContiguous(IsNonContiguous), + DimSize(DimSize) {} + + void processField( + const OMPClauseMappableExprCommon::MappableComponent &MC, + const FieldDecl *FD, + llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)> + EmitMemberExprBase) { + const RecordDecl *RD = FD->getParent(); + const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD); + uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex()); + uint64_t FieldSize = + CGF.getContext().getTypeSize(FD->getType().getCanonicalType()); + Address ComponentLB = Address::invalid(); + + if (FD->getType()->isLValueReferenceType()) { + const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression()); + LValue BaseLVal = EmitMemberExprBase(CGF, ME); + ComponentLB = + CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress(); + } else { + ComponentLB = + CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()).getAddress(); + } + + if (!LastParent) + LastParent = RD; + if (FD->getParent() == LastParent) { + if (FD->getFieldIndex() != LastIndex + 1) + copyUntilField(FD, ComponentLB); + } else { + LastParent = FD->getParent(); + if (((int64_t)FieldOffset - (int64_t)Cursor) > 0) + copyUntilField(FD, ComponentLB); + } + Cursor = FieldOffset + FieldSize; + LastIndex = FD->getFieldIndex(); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); + } + + void copyUntilField(const FieldDecl *FD, Address ComponentLB) { + llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); + llvm::Value *LBPtr = LB.emitRawPointer(CGF); + llvm::Value *Size = + CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr); + copySizedChunk(LBPtr, Size); + } + + void copyUntilEnd(Address HB) { + if (LastParent) { + const ASTRecordLayout &RL = + CGF.getContext().getASTRecordLayout(LastParent); + if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor) + return; + } + llvm::Value *LBPtr = LB.emitRawPointer(CGF); + llvm::Value *Size = CGF.Builder.CreatePtrDiff( + CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), + LBPtr); + copySizedChunk(LBPtr, Size); + } + + void copySizedChunk(llvm::Value *Base, llvm::Value *Size) { + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + CombinedInfo.Pointers.push_back(Base); + CombinedInfo.Sizes.push_back( + CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.Types.push_back(Flags); + CombinedInfo.Mappers.push_back(nullptr); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1); + } + }; + /// Generate the base pointers, section pointers, sizes, map type bits, and /// user-defined mappers (all included in \a CombinedInfo) for the provided /// map type, map or motion modifiers, and expression components. @@ -7570,63 +7674,22 @@ private: getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, IsNonContiguous); - llvm::Value *Size = nullptr; + CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl, + MapExpr, BP, LB, IsNonContiguous, + DimSize); // Do bitcopy of all non-overlapped structure elements. for (OMPClauseMappableExprCommon::MappableExprComponentListRef Component : OverlappedElements) { - Address ComponentLB = Address::invalid(); for (const OMPClauseMappableExprCommon::MappableComponent &MC : Component) { if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { - const auto *FD = dyn_cast<FieldDecl>(VD); - if (FD && FD->getType()->isLValueReferenceType()) { - const auto *ME = - cast<MemberExpr>(MC.getAssociatedExpression()); - LValue BaseLVal = EmitMemberExprBase(CGF, ME); - ComponentLB = - CGF.EmitLValueForFieldInitialization(BaseLVal, FD) - .getAddress(); - } else { - ComponentLB = - CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(); + if (const auto *FD = dyn_cast<FieldDecl>(VD)) { + CopyGaps.processField(MC, FD, EmitMemberExprBase); } - llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); - llvm::Value *LBPtr = LB.emitRawPointer(CGF); - Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, - LBPtr); - break; } } - assert(Size && "Failed to determine structure size"); - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); - CombinedInfo.DevicePtrDecls.push_back(nullptr); - CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); - CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(Flags); - CombinedInfo.Mappers.push_back(nullptr); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); - LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); - CombinedInfo.DevicePtrDecls.push_back(nullptr); - CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); - CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); - llvm::Value *LBPtr = LB.emitRawPointer(CGF); - Size = CGF.Builder.CreatePtrDiff( - CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), - LBPtr); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(Flags); - CombinedInfo.Mappers.push_back(nullptr); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); + CopyGaps.copyUntilEnd(HB); break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); |