From d7ff0036463fbf049a240fe3792fcfcd8081c41e Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Thu, 14 Jan 2021 16:40:41 -0800 Subject: ADT: Fix reference invalidation in SmallVector::emplace_back and assign(N,V) This fixes the final (I think?) reference invalidation in `SmallVector` that we need to fix to align with `std::vector`. (There is still some left in the range insert / append / assign, but the standard calls that UB for `std::vector` so I think we don't care?) For POD-like types, reimplement `emplace_back()` in terms of `push_back()`, taking a copy even for large `T` rather than lose the realloc optimization in `grow_pod()`. For other types, split the grow operation in three and construct the new element in the middle. - `mallocForGrow()` calculates the new capacity and returns the result of `safe_malloc()`. We only need a single definition per `SmallVectorBase` so this is defined in SmallVector.cpp to avoid code size bloat. Moving this part of non-POD grow to the source file also allows the logic to be easily shared with `grow_pod`, and `report_size_overflow()` and `report_at_maximum_capacity()` can move there too. - `moveElementsForGrow()` moves elements from the old to the new allocation. - `takeAllocationForGrow()` frees the old allocation and saves the new allocation and capacity . `SmallVector:assign(size_type, const T&)` also uses the split-grow operations for non-POD, but it also has a semantic change when not growing. Previously, assign would start with `clear()`, and so the old elements were destructed and all elements of the new vector were copy-constructed (potentially invalidating references). The new implementation skips destruction and uses copy-assignment for the prefix of the new vector that fits. The new semantics match what libc++ does for `std::vector::assign()`. Note that the following is another possible implementation: ``` void assign(size_type NumElts, ValueParamT Elt) { std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); this->resize(NumElts, Elt); } ``` The downside of this simpler implementation is that if the vector has to grow there will be `size()` redundant copy operations. (I had planned on splitting this patch up into three for committing (after getting performance numbers / initial review), but I've realized that if this does for some reason need to be reverted we'll probably want to revert the whole package...) Differential Revision: https://reviews.llvm.org/D94739 --- llvm/lib/Support/SmallVector.cpp | 47 +++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 13 deletions(-) (limited to 'llvm/lib/Support/SmallVector.cpp') diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 7e3d01f..0005f78 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -45,12 +45,15 @@ static_assert(sizeof(SmallVector) == sizeof(void *) * 2 + sizeof(void *), "1 byte elements have word-sized type for size and capacity"); -template -void SmallVectorBase::report_size_overflow(size_t MinSize) { +/// Report that MinSize doesn't fit into this vector's size type. Throws +/// std::length_error or calls report_fatal_error. +LLVM_ATTRIBUTE_NORETURN +static void report_size_overflow(size_t MinSize, size_t MaxSize); +static void report_size_overflow(size_t MinSize, size_t MaxSize) { std::string Reason = "SmallVector unable to grow. Requested capacity (" + std::to_string(MinSize) + ") is larger than maximum value for size type (" + - std::to_string(SizeTypeMax()) + ")"; + std::to_string(MaxSize) + ")"; #ifdef LLVM_ENABLE_EXCEPTIONS throw std::length_error(Reason); #else @@ -58,10 +61,13 @@ void SmallVectorBase::report_size_overflow(size_t MinSize) { #endif } -template void SmallVectorBase::report_at_maximum_capacity() { +/// Report that this vector is already at maximum capacity. Throws +/// std::length_error or calls report_fatal_error. +LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(size_t MaxSize); +static void report_at_maximum_capacity(size_t MaxSize) { std::string Reason = "SmallVector capacity unable to grow. Already at maximum size " + - std::to_string(SizeTypeMax()); + std::to_string(MaxSize); #ifdef LLVM_ENABLE_EXCEPTIONS throw std::length_error(Reason); #else @@ -71,25 +77,40 @@ template void SmallVectorBase::report_at_maximum_capacity // Note: Moving this function into the header may cause performance regression. template -void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSize, - size_t TSize) { +static size_t getNewCapacity(size_t MinSize, size_t TSize, size_t OldCapacity) { + constexpr size_t MaxSize = std::numeric_limits::max(); + // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > SizeTypeMax()) - report_size_overflow(MinSize); + if (MinSize > MaxSize) + report_size_overflow(MinSize, MaxSize); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. - if (capacity() == SizeTypeMax()) - report_at_maximum_capacity(); + if (OldCapacity == MaxSize) + report_at_maximum_capacity(MaxSize); // In theory 2*capacity can overflow if the capacity is 64 bit, but the // original capacity would never be large enough for this to be a problem. - size_t NewCapacity = 2 * capacity() + 1; // Always grow. - NewCapacity = std::min(std::max(NewCapacity, MinSize), SizeTypeMax()); + size_t NewCapacity = 2 * OldCapacity + 1; // Always grow. + return std::min(std::max(NewCapacity, MinSize), MaxSize); +} +// Note: Moving this function into the header may cause performance regression. +template +void *SmallVectorBase::mallocForGrow(size_t MinSize, size_t TSize, + size_t &NewCapacity) { + NewCapacity = getNewCapacity(MinSize, TSize, this->capacity()); + return llvm::safe_malloc(NewCapacity * TSize); +} + +// Note: Moving this function into the header may cause performance regression. +template +void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSize, + size_t TSize) { + size_t NewCapacity = getNewCapacity(MinSize, TSize, this->capacity()); void *NewElts; if (BeginX == FirstEl) { NewElts = safe_malloc(NewCapacity * TSize); -- cgit v1.1