diff options
Diffstat (limited to 'compiler-rt')
-rw-r--r-- | compiler-rt/lib/scudo/standalone/allocator_common.h | 7 | ||||
-rw-r--r-- | compiler-rt/lib/scudo/standalone/primary32.h | 106 | ||||
-rw-r--r-- | compiler-rt/lib/scudo/standalone/primary64.h | 153 | ||||
-rw-r--r-- | compiler-rt/lib/scudo/standalone/tests/primary_test.cpp | 34 |
4 files changed, 159 insertions, 141 deletions
diff --git a/compiler-rt/lib/scudo/standalone/allocator_common.h b/compiler-rt/lib/scudo/standalone/allocator_common.h index 95f4776..2b77516 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_common.h +++ b/compiler-rt/lib/scudo/standalone/allocator_common.h @@ -40,6 +40,7 @@ template <class SizeClassAllocator> struct TransferBatch { B->Count = static_cast<u16>(B->Count - N); } void clear() { Count = 0; } + bool empty() { return Count == 0; } void add(CompactPtrT P) { DCHECK_LT(Count, MaxNumCached); Batch[Count++] = P; @@ -48,6 +49,12 @@ template <class SizeClassAllocator> struct TransferBatch { memcpy(Array, Batch, sizeof(Batch[0]) * Count); clear(); } + + void moveNToArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, Count); + memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * N); + Count = static_cast<u16>(Count - N); + } u16 getCount() const { return Count; } bool isEmpty() const { return Count == 0U; } CompactPtrT get(u16 I) const { diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 4d03b28..c86e75b 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -191,38 +191,21 @@ public: return BlockSize > PageSize; } - // Note that the `MaxBlockCount` will be used when we support arbitrary blocks - // count. Now it's the same as the number of blocks stored in the - // `TransferBatch`. u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, - UNUSED const u16 MaxBlockCount) { - TransferBatchT *B = popBatch(C, ClassId); - if (!B) - return 0; - - const u16 Count = B->getCount(); - DCHECK_GT(Count, 0U); - B->moveToArray(ToArray); - - if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, B); - - return Count; - } - - TransferBatchT *popBatch(CacheT *C, uptr ClassId) { + const u16 MaxBlockCount) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); ScopedLock L(Sci->Mutex); - TransferBatchT *B = popBatchImpl(C, ClassId, Sci); - if (UNLIKELY(!B)) { + + u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount); + if (UNLIKELY(PopCount == 0)) { if (UNLIKELY(!populateFreeList(C, ClassId, Sci))) - return nullptr; - B = popBatchImpl(C, ClassId, Sci); - // if `populateFreeList` succeeded, we are supposed to get free blocks. - DCHECK_NE(B, nullptr); + return 0U; + PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount); + DCHECK_NE(PopCount, 0U); } - return B; + + return PopCount; } // Push the array of free blocks to the designated batch group. @@ -510,7 +493,7 @@ private: // by TransferBatch is also free for use. We don't need to recycle the // TransferBatch. Note that the correctness is maintained by the invariant, // - // The unit of each popBatch() request is entire TransferBatch. Return + // Each popBlocks() request returns the entire TransferBatch. Returning // part of the blocks in a TransferBatch is invalid. // // This ensures that TransferBatch won't leak the address itself while it's @@ -634,7 +617,7 @@ private: BG->Batches.push_front(TB); BG->PushedBlocks = 0; BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId)); + BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; return BG; }; @@ -726,14 +709,11 @@ private: InsertBlocks(Cur, Array + Size - Count, Count); } - // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest - // group id will be considered first. - // - // The region mutex needs to be held while calling this method. - TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Sci->Mutex) { if (Sci->FreeListInfo.BlockList.empty()) - return nullptr; + return 0U; SinglyLinkedList<TransferBatchT> &Batches = Sci->FreeListInfo.BlockList.front()->Batches; @@ -746,33 +726,57 @@ private: // Block used by `BatchGroup` is from BatchClassId. Turn the block into // `TransferBatch` with single block. TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG); - TB->clear(); - TB->add( - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB))); + ToArray[0] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)); Sci->FreeListInfo.PoppedBlocks += 1; - return TB; + return 1U; } + // So far, instead of always filling the blocks to `MaxBlockCount`, we only + // examine single `TransferBatch` to minimize the time spent on the primary + // allocator. Besides, the sizes of `TransferBatch` and + // `CacheT::getMaxCached()` may also impact the time spent on accessing the + // primary allocator. + // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` + // blocks and/or adjust the size of `TransferBatch` according to + // `CacheT::getMaxCached()`. TransferBatchT *B = Batches.front(); - Batches.pop_front(); DCHECK_NE(B, nullptr); DCHECK_GT(B->getCount(), 0U); - if (Batches.empty()) { - BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); - Sci->FreeListInfo.BlockList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking while - // allocating. Note that block used by constructing BatchGroup is recorded - // as free blocks in the last element of BatchGroup::Batches. Which means, - // once we pop the last TransferBatch, the block is implicitly - // deallocated. + // BachClassId should always take all blocks in the TransferBatch. Read the + // comment in `pushBatchClassBlocks()` for more details. + const u16 PopCount = ClassId == SizeClassMap::BatchClassId + ? B->getCount() + : Min(MaxBlockCount, B->getCount()); + B->moveNToArray(ToArray, PopCount); + + // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be + // done without holding `Mutex`. + if (B->empty()) { + Batches.pop_front(); + // `TransferBatch` of BatchClassId is self-contained, no need to + // deallocate. Read the comment in `pushBatchClassBlocks()` for more + // details. if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, BG); + C->deallocate(SizeClassMap::BatchClassId, B); + + if (Batches.empty()) { + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking + // while allocating. Note that block used for constructing BatchGroup is + // recorded as free blocks in the last element of BatchGroup::Batches. + // Which means, once we pop the last TransferBatch, the block is + // implicitly deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } } - Sci->FreeListInfo.PoppedBlocks += B->getCount(); - return B; + Sci->FreeListInfo.PoppedBlocks += PopCount; + return PopCount; } NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 9a642d2..d89a2e6 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -12,6 +12,7 @@ #include "allocator_common.h" #include "bytemap.h" #include "common.h" +#include "condition_variable.h" #include "list.h" #include "local_cache.h" #include "mem_map.h" @@ -22,8 +23,6 @@ #include "string_utils.h" #include "thread_annotations.h" -#include "condition_variable.h" - namespace scudo { // SizeClassAllocator64 is an allocator tuned for 64-bit address space. @@ -221,41 +220,24 @@ public: DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); } - // Note that the `MaxBlockCount` will be used when we support arbitrary blocks - // count. Now it's the same as the number of blocks stored in the - // `TransferBatch`. u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, - UNUSED const u16 MaxBlockCount) { - TransferBatchT *B = popBatch(C, ClassId); - if (!B) - return 0; - - const u16 Count = B->getCount(); - DCHECK_GT(Count, 0U); - B->moveToArray(ToArray); - - if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, B); - - return Count; - } - - TransferBatchT *popBatch(CacheT *C, uptr ClassId) { + const u16 MaxBlockCount) { DCHECK_LT(ClassId, NumClasses); RegionInfo *Region = getRegionInfo(ClassId); + u16 PopCount = 0; { ScopedLock L(Region->FLLock); - TransferBatchT *B = popBatchImpl(C, ClassId, Region); - if (LIKELY(B)) - return B; + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) + return PopCount; } bool ReportRegionExhausted = false; - TransferBatchT *B = nullptr; if (conditionVariableEnabled()) { - B = popBatchWithCV(C, ClassId, Region, ReportRegionExhausted); + PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount, + ReportRegionExhausted); } else { while (true) { // When two threads compete for `Region->MMLock`, we only want one of @@ -264,13 +246,15 @@ public: ScopedLock ML(Region->MMLock); { ScopedLock FL(Region->FLLock); - if ((B = popBatchImpl(C, ClassId, Region))) - break; + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) + return PopCount; } const bool RegionIsExhausted = Region->Exhausted; if (!RegionIsExhausted) - B = populateFreeListAndPopBatch(C, ClassId, Region); + PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, + MaxBlockCount); ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; break; } @@ -286,7 +270,7 @@ public: reportOutOfBatchClass(); } - return B; + return PopCount; } // Push the array of free blocks to the designated batch group. @@ -640,7 +624,7 @@ private: // by TransferBatch is also free for use. We don't need to recycle the // TransferBatch. Note that the correctness is maintained by the invariant, // - // The unit of each popBatch() request is entire TransferBatch. Return + // Each popBlocks() request returns the entire TransferBatch. Returning // part of the blocks in a TransferBatch is invalid. // // This ensures that TransferBatch won't leak the address itself while it's @@ -763,7 +747,7 @@ private: BG->Batches.push_front(TB); BG->PushedBlocks = 0; BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId)); + BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; return BG; }; @@ -855,9 +839,10 @@ private: InsertBlocks(Cur, Array + Size - Count, Count); } - TransferBatchT *popBatchWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, - bool &ReportRegionExhausted) { - TransferBatchT *B = nullptr; + u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount, + bool &ReportRegionExhausted) { + u16 PopCount = 0; while (true) { // We only expect one thread doing the freelist refillment and other @@ -878,7 +863,8 @@ private: const bool RegionIsExhausted = Region->Exhausted; if (!RegionIsExhausted) - B = populateFreeListAndPopBatch(C, ClassId, Region); + PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, + MaxBlockCount); ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; { @@ -905,7 +891,8 @@ private: // blocks were used up right after the refillment. Therefore, we have to // check if someone is still populating the freelist. ScopedLock FL(Region->FLLock); - if (LIKELY(B = popBatchImpl(C, ClassId, Region))) + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) break; if (!Region->isPopulatingFreeList) @@ -918,21 +905,19 @@ private: // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. Region->FLLockCV.wait(Region->FLLock); - if (LIKELY(B = popBatchImpl(C, ClassId, Region))) + PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + if (PopCount != 0U) break; } - return B; + return PopCount; } - // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest - // group id will be considered first. - // - // The region mutex needs to be held while calling this method. - TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region) + u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Region->FLLock) { if (Region->FreeListInfo.BlockList.empty()) - return nullptr; + return 0U; SinglyLinkedList<TransferBatchT> &Batches = Region->FreeListInfo.BlockList.front()->Batches; @@ -945,39 +930,64 @@ private: // Block used by `BatchGroup` is from BatchClassId. Turn the block into // `TransferBatch` with single block. TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG); - TB->clear(); - TB->add( - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB))); + ToArray[0] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)); Region->FreeListInfo.PoppedBlocks += 1; - return TB; + return 1U; } + // So far, instead of always filling blocks to `MaxBlockCount`, we only + // examine single `TransferBatch` to minimize the time spent in the primary + // allocator. Besides, the sizes of `TransferBatch` and + // `CacheT::getMaxCached()` may also impact the time spent on accessing the + // primary allocator. + // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` + // blocks and/or adjust the size of `TransferBatch` according to + // `CacheT::getMaxCached()`. TransferBatchT *B = Batches.front(); - Batches.pop_front(); DCHECK_NE(B, nullptr); DCHECK_GT(B->getCount(), 0U); - if (Batches.empty()) { - BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); - Region->FreeListInfo.BlockList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking while - // allocating. Note that block used by constructing BatchGroup is recorded - // as free blocks in the last element of BatchGroup::Batches. Which means, - // once we pop the last TransferBatch, the block is implicitly - // deallocated. + // BachClassId should always take all blocks in the TransferBatch. Read the + // comment in `pushBatchClassBlocks()` for more details. + const u16 PopCount = ClassId == SizeClassMap::BatchClassId + ? B->getCount() + : Min(MaxBlockCount, B->getCount()); + B->moveNToArray(ToArray, PopCount); + + // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be + // done without holding `FLLock`. + if (B->empty()) { + Batches.pop_front(); + // `TransferBatch` of BatchClassId is self-contained, no need to + // deallocate. Read the comment in `pushBatchClassBlocks()` for more + // details. if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, BG); + C->deallocate(SizeClassMap::BatchClassId, B); + + if (Batches.empty()) { + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking + // while allocating. Note that block used for constructing BatchGroup is + // recorded as free blocks in the last element of BatchGroup::Batches. + // Which means, once we pop the last TransferBatch, the block is + // implicitly deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } } - Region->FreeListInfo.PoppedBlocks += B->getCount(); + Region->FreeListInfo.PoppedBlocks += PopCount; - return B; + return PopCount; } - // Refill the freelist and return one batch. - NOINLINE TransferBatchT *populateFreeListAndPopBatch(CacheT *C, uptr ClassId, - RegionInfo *Region) + NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId, + RegionInfo *Region, + CompactPtrT *ToArray, + const u16 MaxBlockCount) REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr Size = getSizeByClassId(ClassId); const u16 MaxCount = CacheT::getMaxCached(Size); @@ -994,7 +1004,7 @@ private: const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { Region->Exhausted = true; - return nullptr; + return 0U; } if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( @@ -1002,7 +1012,7 @@ private: MAP_ALLOWNOMEM | MAP_RESIZABLE | (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0)))) { - return nullptr; + return 0U; } Region->MemMapInfo.MappedUser += MapSize; C->getStats().add(StatMapped, MapSize); @@ -1049,8 +1059,9 @@ private: pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); } - TransferBatchT *B = popBatchImpl(C, ClassId, Region); - DCHECK_NE(B, nullptr); + const u16 PopCount = + popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); + DCHECK_NE(PopCount, 0U); // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record // the requests from `PushBlocks` and `PopBatch` which are external @@ -1062,7 +1073,7 @@ private: C->getStats().add(StatFree, AllocatedUser); Region->MemMapInfo.AllocatedUser += AllocatedUser; - return B; + return PopCount; } void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) @@ -1186,7 +1197,7 @@ private: } // Note that we have extracted the `GroupsToRelease` from region freelist. - // It's safe to let pushBlocks()/popBatches() access the remaining region + // It's safe to let pushBlocks()/popBlocks() access the remaining region // freelist. In the steps 3 and 4, we will temporarily release the FLLock // and lock it again before step 5. diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 1817151..f64a514 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -237,7 +237,6 @@ struct SmallRegionsConfig { // For the 32-bit one, it requires actually exhausting memory, so we skip it. TEST(ScudoPrimaryTest, Primary64OOM) { using Primary = scudo::SizeClassAllocator64<SmallRegionsConfig>; - using TransferBatch = Primary::TransferBatchT; Primary Allocator; Allocator.init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; @@ -245,29 +244,26 @@ TEST(ScudoPrimaryTest, Primary64OOM) { Stats.init(); Cache.init(&Stats, &Allocator); bool AllocationFailed = false; - std::vector<TransferBatch *> Batches; + std::vector<void *> Blocks; const scudo::uptr ClassId = Primary::SizeClassMap::LargestClassId; const scudo::uptr Size = Primary::getSizeByClassId(ClassId); - typename Primary::CacheT::CompactPtrT Blocks[TransferBatch::MaxNumCached]; + const scudo::u16 MaxCachedBlockCount = Primary::CacheT::getMaxCached(Size); for (scudo::uptr I = 0; I < 10000U; I++) { - TransferBatch *B = Allocator.popBatch(&Cache, ClassId); - if (!B) { - AllocationFailed = true; - break; + for (scudo::uptr J = 0; J < MaxCachedBlockCount; ++J) { + void *Ptr = Cache.allocate(ClassId); + if (Ptr == nullptr) { + AllocationFailed = true; + break; + } + memset(Ptr, 'B', Size); + Blocks.push_back(Ptr); } - for (scudo::u16 J = 0; J < B->getCount(); J++) - memset(Allocator.decompactPtr(ClassId, B->get(J)), 'B', Size); - Batches.push_back(B); - } - while (!Batches.empty()) { - TransferBatch *B = Batches.back(); - Batches.pop_back(); - const scudo::u16 Count = B->getCount(); - B->moveToArray(Blocks); - Allocator.pushBlocks(&Cache, ClassId, Blocks, Count); - Cache.deallocate(Primary::SizeClassMap::BatchClassId, B); } + + for (auto *Ptr : Blocks) + Cache.deallocate(ClassId, Ptr); + Cache.destroy(nullptr); Allocator.releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; @@ -342,7 +338,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { V.push_back(std::make_pair(ClassId, P)); } - // Try to interleave pushBlocks(), popBatch() and releaseToOS(). + // Try to interleave pushBlocks(), popBlocks() and releaseToOS(). Allocator->releaseToOS(scudo::ReleaseToOS::Force); while (!V.empty()) { |