diff options
-rw-r--r-- | clang/docs/ReleaseNotes.rst | 3 | ||||
-rw-r--r-- | clang/include/clang/Basic/Attr.td | 12 | ||||
-rw-r--r-- | clang/include/clang/Basic/AttrDocs.td | 66 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGCoroutine.cpp | 4 | ||||
-rw-r--r-- | clang/test/CodeGenCoroutines/coro-only-destroy-when-complete.cpp | 59 | ||||
-rw-r--r-- | clang/test/Misc/pragma-attribute-supported-attributes-list.test | 1 | ||||
-rw-r--r-- | llvm/docs/Coroutines.rst | 11 | ||||
-rw-r--r-- | llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 | ||||
-rw-r--r-- | llvm/include/llvm/IR/Attributes.td | 3 | ||||
-rw-r--r-- | llvm/include/llvm/IR/Function.h | 7 | ||||
-rw-r--r-- | llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 | ||||
-rw-r--r-- | llvm/test/Transforms/Coroutines/coro-only-destroy-when-complete.ll | 137 |
15 files changed, 323 insertions, 7 deletions
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8bac599..7a131cb 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -296,6 +296,9 @@ Attribute Changes in Clang is ignored, changed from the former incorrect suggestion to move it past declaration specifiers. (`#58637 <https://github.com/llvm/llvm-project/issues/58637>`_) +- Clang now introduced ``[[clang::coro_only_destroy_when_complete]]`` attribute + to reduce the size of the destroy functions for coroutines which are known to + be destroyed after having reached the final suspend point. Improvements to Clang's diagnostics ----------------------------------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 60b5499..3143456 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1082,6 +1082,18 @@ def CFConsumed : InheritableParamAttr { let Documentation = [RetainBehaviorDocs]; } + +// coro_only_destroy_when_complete indicates the coroutines whose return type +// is marked by coro_only_destroy_when_complete can only be destroyed when the +// coroutine completes. Then the space for the destroy functions can be saved. +def CoroOnlyDestroyWhenComplete : InheritableAttr { + let Spellings = [Clang<"coro_only_destroy_when_complete">]; + let Subjects = SubjectList<[CXXRecord]>; + let LangOpts = [CPlusPlus]; + let Documentation = [CoroOnlyDestroyWhenCompleteDocs]; + let SimpleHandler = 1; +} + // OSObject-based attributes. def OSConsumed : InheritableParamAttr { let Spellings = [Clang<"os_consumed">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 05703df..fa6f6ac 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -7416,3 +7416,69 @@ that ``p->array`` must have at least ``p->count`` number of elements available: }]; } + +def CoroOnlyDestroyWhenCompleteDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +The `coro_only_destroy_when_complete` attribute should be marked on a C++ class. The coroutines +whose return type is marked with the attribute are assumed to be destroyed only after the coroutine has +reached the final suspend point. + +This is helpful for the optimizers to reduce the size of the destroy function for the coroutines. + +For example, + +.. code-block:: c++ + + A foo() { + dtor d; + co_await something(); + dtor d1; + co_await something(); + dtor d2; + co_return 43; + } + +The compiler may generate the following pseudocode: + +.. code-block:: c++ + + void foo.destroy(foo.Frame *frame) { + switch(frame->suspend_index()) { + case 1: + frame->d.~dtor(); + break; + case 2: + frame->d.~dtor(); + frame->d1.~dtor(); + break; + case 3: + frame->d.~dtor(); + frame->d1.~dtor(); + frame->d2.~dtor(); + break; + default: // coroutine completed or haven't started + break; + } + + frame->promise.~promise_type(); + delete frame; + } + +The `foo.destroy()` function's purpose is to release all of the resources +initialized for the coroutine when it is destroyed in a suspended state. +However, if the coroutine is only ever destroyed at the final suspend state, +the rest of the conditions are superfluous. + +The user can use the `coro_only_destroy_when_complete` attributo suppress +generation of the other destruction cases, optimizing the above `foo.destroy` to: + +.. code-block:: c++ + + void foo.destroy(foo.Frame *frame) { + frame->promise.~promise_type(); + delete frame; + } + + }]; +} diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 5831021..7e449d5 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -777,6 +777,10 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // LLVM require the frontend to mark the coroutine. CurFn->setPresplitCoroutine(); + + if (CXXRecordDecl *RD = FnRetTy->getAsCXXRecordDecl(); + RD && RD->hasAttr<CoroOnlyDestroyWhenCompleteAttr>()) + CurFn->setCoroDestroyOnlyWhenComplete(); } // Emit coroutine intrinsic and patch up arguments of the token type. diff --git a/clang/test/CodeGenCoroutines/coro-only-destroy-when-complete.cpp b/clang/test/CodeGenCoroutines/coro-only-destroy-when-complete.cpp new file mode 100644 index 0000000..9d4ae85 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-only-destroy-when-complete.cpp @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 \ +// RUN: -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 \ +// RUN: -O3 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-O + +#include "Inputs/coroutine.h" + +using namespace std; + +struct A; +struct A_promise_type { + A get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_value(int); + void unhandled_exception(); + + std::coroutine_handle<> handle; +}; + +struct Awaitable{ + bool await_ready(); + int await_resume(); + template <typename F> + void await_suspend(F); +}; +Awaitable something(); + +struct dtor { + dtor(); + ~dtor(); +}; + +struct [[clang::coro_only_destroy_when_complete]] A { + using promise_type = A_promise_type; + A(); + A(std::coroutine_handle<>); + ~A(); + + std::coroutine_handle<promise_type> handle; +}; + +A foo() { + dtor d; + co_await something(); + dtor d1; + co_await something(); + dtor d2; + co_return 43; +} + +// CHECK: define{{.*}}@_Z3foov({{.*}}) #[[ATTR_NUM:[0-9]+]] +// CHECK: attributes #[[ATTR_NUM]] = {{.*}}coro_only_destroy_when_complete + +// CHECK-O: define{{.*}}@_Z3foov.destroy +// CHECK-O: {{^.*}}: +// CHECK-O-NOT: br +// CHECK-O: ret void diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index f481267..9697940 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -56,6 +56,7 @@ // CHECK-NEXT: ConsumableAutoCast (SubjectMatchRule_record) // CHECK-NEXT: ConsumableSetOnRead (SubjectMatchRule_record) // CHECK-NEXT: Convergent (SubjectMatchRule_function) +// CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record) // CHECK-NEXT: CountedBy (SubjectMatchRule_field) // CHECK-NEXT: DLLExport (SubjectMatchRule_function, SubjectMatchRule_variable, SubjectMatchRule_record, SubjectMatchRule_objc_interface) // CHECK-NEXT: DLLImport (SubjectMatchRule_function, SubjectMatchRule_variable, SubjectMatchRule_record, SubjectMatchRule_objc_interface) diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst index f4a2781..d6219d2 100644 --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -1775,6 +1775,17 @@ CoroCleanup This pass runs late to lower all coroutine related intrinsics not replaced by earlier passes. +Attributes +========== + +coro_only_destroy_when_complete +------------------------------- + +When the coroutine are marked with coro_only_destroy_when_complete, it indicates +the coroutine must reach the final suspend point when it get destroyed. + +This attribute only works for switched-resume coroutines now. + Metadata ======== diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 2a522c5..9fa70c0 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -718,6 +718,7 @@ enum AttributeKindCodes { ATTR_KIND_NOFPCLASS = 87, ATTR_KIND_OPTIMIZE_FOR_DEBUGGING = 88, ATTR_KIND_WRITABLE = 89, + ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE = 90, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 693ffbb..fc38e68 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -318,6 +318,9 @@ def MustProgress : EnumAttr<"mustprogress", [FnAttr]>; /// Function is a presplit coroutine. def PresplitCoroutine : EnumAttr<"presplitcoroutine", [FnAttr]>; +/// The coroutine would only be destroyed when it is complete. +def CoroDestroyOnlyWhenComplete : EnumAttr<"coro_only_destroy_when_complete", [FnAttr]>; + /// Target-independent string attributes. def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">; def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 1628aaf..955a4fc 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -506,6 +506,13 @@ public: void setPresplitCoroutine() { addFnAttr(Attribute::PresplitCoroutine); } void setSplittedCoroutine() { removeFnAttr(Attribute::PresplitCoroutine); } + bool isCoroOnlyDestroyWhenComplete() const { + return hasFnAttribute(Attribute::CoroDestroyOnlyWhenComplete); + } + void setCoroDestroyOnlyWhenComplete() { + addFnAttr(Attribute::CoroDestroyOnlyWhenComplete); + } + MemoryEffects getMemoryEffects() const; void setMemoryEffects(MemoryEffects ME); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 1c92da5a..76431e8 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2063,6 +2063,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::PresplitCoroutine; case bitc::ATTR_KIND_WRITABLE: return Attribute::Writable; + case bitc::ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE: + return Attribute::CoroDestroyOnlyWhenComplete; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 3a17ee1..d16b5c7 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -826,6 +826,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_PRESPLIT_COROUTINE; case Attribute::Writable: return bitc::ATTR_KIND_WRITABLE; + case Attribute::CoroDestroyOnlyWhenComplete: + return bitc::ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index c7bc104..1a46e8c 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -529,13 +529,20 @@ void CoroCloner::handleFinalSuspend() { BasicBlock *OldSwitchBB = Switch->getParent(); auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); Builder.SetInsertPoint(OldSwitchBB->getTerminator()); - auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, - coro::Shape::SwitchFieldIndex::Resume, - "ResumeFn.addr"); - auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), - GepIndex); - auto *Cond = Builder.CreateIsNull(Load); - Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); + + if (NewF->isCoroOnlyDestroyWhenComplete()) { + // When the coroutine can only be destroyed when complete, we don't need + // to generate code for other cases. + Builder.CreateBr(ResumeBB); + } else { + auto *GepIndex = Builder.CreateStructGEP( + Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); + auto *Load = + Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex); + auto *Cond = Builder.CreateIsNull(Load); + Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); + } OldSwitchBB->getTerminator()->eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 6500854..4057531 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -922,6 +922,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::PresplitCoroutine: case Attribute::Memory: case Attribute::NoFPClass: + case Attribute::CoroDestroyOnlyWhenComplete: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: diff --git a/llvm/test/Transforms/Coroutines/coro-only-destroy-when-complete.ll b/llvm/test/Transforms/Coroutines/coro-only-destroy-when-complete.ll new file mode 100644 index 0000000..3f0899a --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-only-destroy-when-complete.ll @@ -0,0 +1,137 @@ +; RUN: opt < %s -passes='cgscc(coro-split),early-cse,dce,simplifycfg' -S | FileCheck %s + +%"struct.std::__n4861::noop_coroutine_promise" = type { i8 } +%struct.Promise = type { %"struct.std::__n4861::coroutine_handle" } +%"struct.std::__n4861::coroutine_handle" = type { ptr } + +define ptr @foo() #1 { +entry: + %__promise = alloca %struct.Promise, align 8 + %0 = call token @llvm.coro.id(i32 16, ptr nonnull %__promise, ptr nonnull @foo, ptr null) + %1 = call i1 @llvm.coro.alloc(token %0) + br i1 %1, label %coro.alloc, label %init.suspend + +coro.alloc: ; preds = %entry + %2 = tail call i64 @llvm.coro.size.i64() + %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef %2) #11 + br label %init.suspend + +init.suspend: ; preds = %entry, %coro.alloc + %3 = phi ptr [ null, %entry ], [ %call, %coro.alloc ] + %4 = call ptr @llvm.coro.begin(token %0, ptr %3) #12 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %__promise) #3 + store ptr null, ptr %__promise, align 8 + %5 = call token @llvm.coro.save(ptr null) + %6 = call i8 @llvm.coro.suspend(token %5, i1 false) + switch i8 %6, label %coro.ret [ + i8 0, label %await.suspend + i8 1, label %cleanup1 + ] + +await.suspend: ; preds = %init.suspend + %7 = call token @llvm.coro.save(ptr null) + %8 = call i8 @llvm.coro.suspend(token %7, i1 false) + switch i8 %8, label %coro.ret [ + i8 0, label %await2.suspend + i8 1, label %cleanup2 + ] + +await2.suspend: ; preds = %await.suspend + %call27 = call ptr @_Z5Innerv() #3 + %9 = call token @llvm.coro.save(ptr null) + %10 = getelementptr inbounds i8, ptr %__promise, i64 -16 + store ptr %10, ptr %call27, align 8 + %11 = getelementptr inbounds i8, ptr %call27, i64 -16 + %12 = call ptr @llvm.coro.subfn.addr(ptr nonnull %11, i8 0) + call fastcc void %12(ptr nonnull %11) #3 + %13 = call i8 @llvm.coro.suspend(token %9, i1 false) + switch i8 %13, label %coro.ret [ + i8 0, label %final.suspend + i8 1, label %cleanup3 + ] + +final.suspend: ; preds = %await2.suspend + %14 = call ptr @llvm.coro.subfn.addr(ptr nonnull %11, i8 1) + call fastcc void %14(ptr nonnull %11) #3 + %15 = call token @llvm.coro.save(ptr null) + %retval.sroa.0.0.copyload.i = load ptr, ptr %__promise, align 8 + %16 = call ptr @llvm.coro.subfn.addr(ptr %retval.sroa.0.0.copyload.i, i8 0) + call fastcc void %16(ptr %retval.sroa.0.0.copyload.i) #3 + %17 = call i8 @llvm.coro.suspend(token %15, i1 true) #12 + switch i8 %17, label %coro.ret [ + i8 0, label %final.ready + i8 1, label %cleanup62 + ] + +final.ready: ; preds = %final.suspend + call void @exit(i32 noundef 1) + unreachable + +cleanup1: + call void @dtor1() + br label %cleanup62 + +cleanup2: + call void @dtor2() + br label %cleanup62 + +cleanup3: + call void @dtor3() + br label %cleanup62 + +cleanup62: ; preds = %await2.suspend, %await.suspend, %init.suspend, %final.suspend + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %__promise) #3 + %18 = call ptr @llvm.coro.free(token %0, ptr %4) + %.not = icmp eq ptr %18, null + br i1 %.not, label %coro.ret, label %coro.free + +coro.free: ; preds = %cleanup62 + call void @_ZdlPv(ptr noundef nonnull %18) #3 + br label %coro.ret + +coro.ret: ; preds = %coro.free, %cleanup62, %final.suspend, %await2.suspend, %await.suspend, %init.suspend + %19 = call i1 @llvm.coro.end(ptr null, i1 false, token none) #12 + ret ptr %__promise +} + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #2 +declare i1 @llvm.coro.alloc(token) #3 +declare dso_local noundef nonnull ptr @_Znwm(i64 noundef) local_unnamed_addr #4 +declare i64 @llvm.coro.size.i64() #5 +declare ptr @llvm.coro.begin(token, ptr writeonly) #3 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6 +declare token @llvm.coro.save(ptr) #7 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6 +declare i8 @llvm.coro.suspend(token, i1) #3 +declare ptr @_Z5Innerv() local_unnamed_addr +declare dso_local void @_ZdlPv(ptr noundef) local_unnamed_addr #8 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #2 +declare i1 @llvm.coro.end(ptr, i1, token) #3 +declare void @exit(i32 noundef) +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #10 +declare void @dtor1() +declare void @dtor2() +declare void @dtor3() + +attributes #0 = { mustprogress nounwind uwtable } +attributes #1 = { nounwind presplitcoroutine uwtable coro_only_destroy_when_complete } +attributes #2 = { argmemonly nofree nounwind readonly } +attributes #3 = { nounwind } +attributes #4 = { nobuiltin allocsize(0) } +attributes #5 = { nofree nosync nounwind readnone } +attributes #6 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn } +attributes #7 = { nomerge nounwind } +attributes #8 = { nobuiltin nounwind } +attributes #9 = { noreturn } +attributes #10 = { argmemonly nounwind readonly } +attributes #11 = { nounwind allocsize(0) } +attributes #12 = { noduplicate } + +; CHECK: define{{.*}}@foo.destroy( +; CHECK-NEXT: entry.destroy: +; CHECK-NEXT: call void @_ZdlPv +; CHECK-NEXT: ret void + +; CHECK: define{{.*}}@foo.cleanup( +; CHECK-NEXT: entry.cleanup: +; CHECK-NEXT: ret void |