1 files changed, 114 insertions, 74 deletions
diff --git a/libgo/go/runtime/mpallocbits.go b/libgo/go/runtime/mpallocbits.go
index a801134..ff11230 100644
--- a/libgo/go/runtime/mpallocbits.go
+++ b/libgo/go/runtime/mpallocbits.go
@@ -120,84 +120,105 @@ func (b *pageBits) popcntRange(i, n uint) (s uint) {
 // sake of documentation, 0s are free pages and 1s are allocated pages.
 type pallocBits pageBits
 
-// consec8tab is a table containing the number of consecutive
-// zero bits for any uint8 value.
-//
-// The table is generated by calling consec8(i) for each
-// possible uint8 value, which is defined as:
-//
-// // consec8 counts the maximum number of consecutive 0 bits
-// // in a uint8.
-// func consec8(n uint8) int {
-// 	n = ^n
-// 	i := 0
-// 	for n != 0 {
-// 		n &= (n << 1)
-// 		i++
-// 	}
-// 	return i
-// }
-var consec8tab = [256]uint{
-	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
-	4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
-	6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
-	4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
-	5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
-	4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0,
-}
-
 // summarize returns a packed summary of the bitmap in pallocBits.
 func (b *pallocBits) summarize() pallocSum {
-	// TODO(mknyszek): There may be something more clever to be done
-	// here to make the summarize operation more efficient. For example,
-	// we can compute start and end with 64-bit wide operations easily,
-	// but max is a bit more complex. Perhaps there exists some way to
-	// leverage the 64-bit start and end to our advantage?
-	var start, max, end uint
+	var start, max, cur uint
+	const notSetYet = ^uint(0) // sentinel for start value
+	start = notSetYet
 	for i := 0; i < len(b); i++ {
-		a := b[i]
-		for j := 0; j < 64; j += 8 {
-			k := uint8(a >> j)
-
-			// Compute start.
-			si := uint(sys.TrailingZeros8(k))
-			if start == uint(i*64+j) {
-				start += si
-			}
+		x := b[i]
+		if x == 0 {
+			cur += 64
+			continue
+		}
+		t := uint(sys.TrailingZeros64(x))
+		l := uint(sys.LeadingZeros64(x))
 
-			// Compute max.
-			if end+si > max {
-				max = end + si
-			}
-			if mi := consec8tab[k]; mi > max {
-				max = mi
+		// Finish any region spanning the uint64s
+		cur += t
+		if start == notSetYet {
+			start = cur
+		}
+		if cur > max {
+			max = cur
+		}
+		// Final region that might span to next uint64
+		cur = l
+	}
+	if start == notSetYet {
+		// Made it all the way through without finding a single 1 bit.
+		const n = uint(64 * len(b))
+		return packPallocSum(n, n, n)
+	}
+	if cur > max {
+		max = cur
+	}
+	if max >= 64-2 {
+		// There is no way an internal run of zeros could beat max.
+		return packPallocSum(start, max, cur)
+	}
+	// Now look inside each uint64 for runs of zeros.
+	// All uint64s must be nonzero, or we would have aborted above.
+outer:
+	for i := 0; i < len(b); i++ {
+		x := b[i]
+
+		// Look inside this uint64. We have a pattern like
+		// 000000 1xxxxx1 000000
+		// We need to look inside the 1xxxxx1 for any contiguous
+		// region of zeros.
+
+		// We already know the trailing zeros are no larger than max. Remove them.
+		x >>= sys.TrailingZeros64(x) & 63
+		if x&(x+1) == 0 { // no more zeros (except at the top).
+			continue
+		}
+
+		// Strategy: shrink all runs of zeros by max. If any runs of zero
+		// remain, then we've identified a larger maxiumum zero run.
+		p := max     // number of zeros we still need to shrink by.
+		k := uint(1) // current minimum length of runs of ones in x.
+		for {
+			// Shrink all runs of zeros by p places (except the top zeros).
+			for p > 0 {
+				if p <= k {
+					// Shift p ones down into the top of each run of zeros.
+					x |= x >> (p & 63)
+					if x&(x+1) == 0 { // no more zeros (except at the top).
+						continue outer
+					}
+					break
+				}
+				// Shift k ones down into the top of each run of zeros.
+				x |= x >> (k & 63)
+				if x&(x+1) == 0 { // no more zeros (except at the top).
+					continue outer
+				}
+				p -= k
+				// We've just doubled the minimum length of 1-runs.
+				// This allows us to shift farther in the next iteration.
+				k *= 2
 			}
 
-			// Compute end.
-			if k == 0 {
-				end += 8
-			} else {
-				end = uint(sys.LeadingZeros8(k))
+			// The length of the lowest-order zero run is an increment to our maximum.
+			j := uint(sys.TrailingZeros64(^x)) // count contiguous trailing ones
+			x >>= j & 63                       // remove trailing ones
+			j = uint(sys.TrailingZeros64(x))   // count contiguous trailing zeros
+			x >>= j & 63                       // remove zeros
+			max += j                           // we have a new maximum!
+			if x&(x+1) == 0 {                  // no more zeros (except at the top).
+				continue outer
 			}
+			p = j // remove j more zeros from each zero run.
 		}
 	}
-	return packPallocSum(start, max, end)
+	return packPallocSum(start, max, cur)
 }
 
 // find searches for npages contiguous free pages in pallocBits and returns
 // the index where that run starts, as well as the index of the first free page
 // it found in the search. searchIdx represents the first known free page and
-// where to begin the search from.
+// where to begin the next search from.
 //
 // If find fails to find any free space, it returns an index of ^uint(0) and
 // the new searchIdx should be ignored.
@@ -218,9 +239,10 @@ func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) {
 //
 // See find for an explanation of the searchIdx parameter.
 func (b *pallocBits) find1(searchIdx uint) uint {
+	_ = b[0] // lift nil check out of loop
 	for i := searchIdx / 64; i < uint(len(b)); i++ {
 		x := b[i]
-		if x == ^uint64(0) {
+		if ^x == 0 {
 			continue
 		}
 		return i*64 + uint(sys.TrailingZeros64(^x))
@@ -242,18 +264,18 @@ func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) {
 	end, newSearchIdx := uint(0), ^uint(0)
 	for i := searchIdx / 64; i < uint(len(b)); i++ {
 		bi := b[i]
-		if bi == ^uint64(0) {
+		if ^bi == 0 {
 			end = 0
 			continue
 		}
 		// First see if we can pack our allocation in the trailing
 		// zeros plus the end of the last 64 bits.
-		start := uint(sys.TrailingZeros64(bi))
 		if newSearchIdx == ^uint(0) {
 			// The new searchIdx is going to be at these 64 bits after any
 			// 1s we file, so count trailing 1s.
 			newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi))
 		}
+		start := uint(sys.TrailingZeros64(bi))
 		if end+start >= uint(npages) {
 			return i*64 - end, newSearchIdx
 		}
@@ -348,15 +370,33 @@ func (b *pallocBits) pages64(i uint) uint64 {
 // findBitRange64 returns the bit index of the first set of
 // n consecutive 1 bits. If no consecutive set of 1 bits of
 // size n may be found in c, then it returns an integer >= 64.
+// n must be > 0.
 func findBitRange64(c uint64, n uint) uint {
-	i := uint(0)
-	cont := uint(sys.TrailingZeros64(^c))
-	for cont < n && i < 64 {
-		i += cont
-		i += uint(sys.TrailingZeros64(c >> i))
-		cont = uint(sys.TrailingZeros64(^(c >> i)))
+	// This implementation is based on shrinking the length of
+	// runs of contiguous 1 bits. We remove the top n-1 1 bits
+	// from each run of 1s, then look for the first remaining 1 bit.
+	p := n - 1   // number of 1s we want to remove.
+	k := uint(1) // current minimum width of runs of 0 in c.
+	for p > 0 {
+		if p <= k {
+			// Shift p 0s down into the top of each run of 1s.
+			c &= c >> (p & 63)
+			break
+		}
+		// Shift k 0s down into the top of each run of 1s.
+		c &= c >> (k & 63)
+		if c == 0 {
+			return 64
+		}
+		p -= k
+		// We've just doubled the minimum length of 0-runs.
+		// This allows us to shift farther in the next iteration.
+		k *= 2
 	}
-	return i
+	// Find first remaining 1.
+	// Since we shrunk from the top down, the first 1 is in
+	// its correct original position.
+	return uint(sys.TrailingZeros64(c))
 }
 
 // pallocData encapsulates pallocBits and a bitmap for