diff options
Diffstat (limited to 'libgo/go/runtime/mpallocbits.go')
-rw-r--r-- | libgo/go/runtime/mpallocbits.go | 188 |
1 files changed, 114 insertions, 74 deletions
diff --git a/libgo/go/runtime/mpallocbits.go b/libgo/go/runtime/mpallocbits.go index a801134..ff11230 100644 --- a/libgo/go/runtime/mpallocbits.go +++ b/libgo/go/runtime/mpallocbits.go @@ -120,84 +120,105 @@ func (b *pageBits) popcntRange(i, n uint) (s uint) { // sake of documentation, 0s are free pages and 1s are allocated pages. type pallocBits pageBits -// consec8tab is a table containing the number of consecutive -// zero bits for any uint8 value. -// -// The table is generated by calling consec8(i) for each -// possible uint8 value, which is defined as: -// -// // consec8 counts the maximum number of consecutive 0 bits -// // in a uint8. -// func consec8(n uint8) int { -// n = ^n -// i := 0 -// for n != 0 { -// n &= (n << 1) -// i++ -// } -// return i -// } -var consec8tab = [256]uint{ - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, - 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, - 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0, -} - // summarize returns a packed summary of the bitmap in pallocBits. func (b *pallocBits) summarize() pallocSum { - // TODO(mknyszek): There may be something more clever to be done - // here to make the summarize operation more efficient. For example, - // we can compute start and end with 64-bit wide operations easily, - // but max is a bit more complex. Perhaps there exists some way to - // leverage the 64-bit start and end to our advantage? - var start, max, end uint + var start, max, cur uint + const notSetYet = ^uint(0) // sentinel for start value + start = notSetYet for i := 0; i < len(b); i++ { - a := b[i] - for j := 0; j < 64; j += 8 { - k := uint8(a >> j) - - // Compute start. - si := uint(sys.TrailingZeros8(k)) - if start == uint(i*64+j) { - start += si - } + x := b[i] + if x == 0 { + cur += 64 + continue + } + t := uint(sys.TrailingZeros64(x)) + l := uint(sys.LeadingZeros64(x)) - // Compute max. - if end+si > max { - max = end + si - } - if mi := consec8tab[k]; mi > max { - max = mi + // Finish any region spanning the uint64s + cur += t + if start == notSetYet { + start = cur + } + if cur > max { + max = cur + } + // Final region that might span to next uint64 + cur = l + } + if start == notSetYet { + // Made it all the way through without finding a single 1 bit. + const n = uint(64 * len(b)) + return packPallocSum(n, n, n) + } + if cur > max { + max = cur + } + if max >= 64-2 { + // There is no way an internal run of zeros could beat max. + return packPallocSum(start, max, cur) + } + // Now look inside each uint64 for runs of zeros. + // All uint64s must be nonzero, or we would have aborted above. +outer: + for i := 0; i < len(b); i++ { + x := b[i] + + // Look inside this uint64. We have a pattern like + // 000000 1xxxxx1 000000 + // We need to look inside the 1xxxxx1 for any contiguous + // region of zeros. + + // We already know the trailing zeros are no larger than max. Remove them. + x >>= sys.TrailingZeros64(x) & 63 + if x&(x+1) == 0 { // no more zeros (except at the top). + continue + } + + // Strategy: shrink all runs of zeros by max. If any runs of zero + // remain, then we've identified a larger maxiumum zero run. + p := max // number of zeros we still need to shrink by. + k := uint(1) // current minimum length of runs of ones in x. + for { + // Shrink all runs of zeros by p places (except the top zeros). + for p > 0 { + if p <= k { + // Shift p ones down into the top of each run of zeros. + x |= x >> (p & 63) + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer + } + break + } + // Shift k ones down into the top of each run of zeros. + x |= x >> (k & 63) + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer + } + p -= k + // We've just doubled the minimum length of 1-runs. + // This allows us to shift farther in the next iteration. + k *= 2 } - // Compute end. - if k == 0 { - end += 8 - } else { - end = uint(sys.LeadingZeros8(k)) + // The length of the lowest-order zero run is an increment to our maximum. + j := uint(sys.TrailingZeros64(^x)) // count contiguous trailing ones + x >>= j & 63 // remove trailing ones + j = uint(sys.TrailingZeros64(x)) // count contiguous trailing zeros + x >>= j & 63 // remove zeros + max += j // we have a new maximum! + if x&(x+1) == 0 { // no more zeros (except at the top). + continue outer } + p = j // remove j more zeros from each zero run. } } - return packPallocSum(start, max, end) + return packPallocSum(start, max, cur) } // find searches for npages contiguous free pages in pallocBits and returns // the index where that run starts, as well as the index of the first free page // it found in the search. searchIdx represents the first known free page and -// where to begin the search from. +// where to begin the next search from. // // If find fails to find any free space, it returns an index of ^uint(0) and // the new searchIdx should be ignored. @@ -218,9 +239,10 @@ func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) { // // See find for an explanation of the searchIdx parameter. func (b *pallocBits) find1(searchIdx uint) uint { + _ = b[0] // lift nil check out of loop for i := searchIdx / 64; i < uint(len(b)); i++ { x := b[i] - if x == ^uint64(0) { + if ^x == 0 { continue } return i*64 + uint(sys.TrailingZeros64(^x)) @@ -242,18 +264,18 @@ func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) { end, newSearchIdx := uint(0), ^uint(0) for i := searchIdx / 64; i < uint(len(b)); i++ { bi := b[i] - if bi == ^uint64(0) { + if ^bi == 0 { end = 0 continue } // First see if we can pack our allocation in the trailing // zeros plus the end of the last 64 bits. - start := uint(sys.TrailingZeros64(bi)) if newSearchIdx == ^uint(0) { // The new searchIdx is going to be at these 64 bits after any // 1s we file, so count trailing 1s. newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi)) } + start := uint(sys.TrailingZeros64(bi)) if end+start >= uint(npages) { return i*64 - end, newSearchIdx } @@ -348,15 +370,33 @@ func (b *pallocBits) pages64(i uint) uint64 { // findBitRange64 returns the bit index of the first set of // n consecutive 1 bits. If no consecutive set of 1 bits of // size n may be found in c, then it returns an integer >= 64. +// n must be > 0. func findBitRange64(c uint64, n uint) uint { - i := uint(0) - cont := uint(sys.TrailingZeros64(^c)) - for cont < n && i < 64 { - i += cont - i += uint(sys.TrailingZeros64(c >> i)) - cont = uint(sys.TrailingZeros64(^(c >> i))) + // This implementation is based on shrinking the length of + // runs of contiguous 1 bits. We remove the top n-1 1 bits + // from each run of 1s, then look for the first remaining 1 bit. + p := n - 1 // number of 1s we want to remove. + k := uint(1) // current minimum width of runs of 0 in c. + for p > 0 { + if p <= k { + // Shift p 0s down into the top of each run of 1s. + c &= c >> (p & 63) + break + } + // Shift k 0s down into the top of each run of 1s. + c &= c >> (k & 63) + if c == 0 { + return 64 + } + p -= k + // We've just doubled the minimum length of 0-runs. + // This allows us to shift farther in the next iteration. + k *= 2 } - return i + // Find first remaining 1. + // Since we shrunk from the top down, the first 1 is in + // its correct original position. + return uint(sys.TrailingZeros64(c)) } // pallocData encapsulates pallocBits and a bitmap for |