diff options
Diffstat (limited to 'libgo/go/runtime/memmove_test.go')
-rw-r--r-- | libgo/go/runtime/memmove_test.go | 69 |
1 files changed, 54 insertions, 15 deletions
diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go index 396c130..7c9d2ad 100644 --- a/libgo/go/runtime/memmove_test.go +++ b/libgo/go/runtime/memmove_test.go @@ -286,6 +286,9 @@ var bufSizes = []int{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, } +var bufSizesOverlap = []int{ + 32, 64, 128, 256, 512, 1024, 2048, 4096, +} func BenchmarkMemmove(b *testing.B) { benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { @@ -297,6 +300,15 @@ func BenchmarkMemmove(b *testing.B) { }) } +func BenchmarkMemmoveOverlap(b *testing.B) { + benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) { + x := make([]byte, n+16) + for i := 0; i < b.N; i++ { + copy(x[16:n+16], x[:n]) + } + }) +} + func BenchmarkMemmoveUnalignedDst(b *testing.B) { benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { x := make([]byte, n+1) @@ -307,6 +319,15 @@ func BenchmarkMemmoveUnalignedDst(b *testing.B) { }) } +func BenchmarkMemmoveUnalignedDstOverlap(b *testing.B) { + benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) { + x := make([]byte, n+16) + for i := 0; i < b.N; i++ { + copy(x[16:n+16], x[1:n+1]) + } + }) +} + func BenchmarkMemmoveUnalignedSrc(b *testing.B) { benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { x := make([]byte, n) @@ -317,6 +338,15 @@ func BenchmarkMemmoveUnalignedSrc(b *testing.B) { }) } +func BenchmarkMemmoveUnalignedSrcOverlap(b *testing.B) { + benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) { + x := make([]byte, n+1) + for i := 0; i < b.N; i++ { + copy(x[1:n+1], x[:n]) + } + }) +} + func TestMemclr(t *testing.T) { size := 512 if testing.Short() { @@ -538,21 +568,30 @@ func BenchmarkCopyFat1024(b *testing.B) { } } +// BenchmarkIssue18740 ensures that memmove uses 4 and 8 byte load/store to move 4 and 8 bytes. +// It used to do 2 2-byte load/stores, which leads to a pipeline stall +// when we try to read the result with one 4-byte load. func BenchmarkIssue18740(b *testing.B) { - // This tests that memmove uses one 4-byte load/store to move 4 bytes. - // It used to do 2 2-byte load/stores, which leads to a pipeline stall - // when we try to read the result with one 4-byte load. - var buf [4]byte - for j := 0; j < b.N; j++ { - s := uint32(0) - for i := 0; i < 4096; i += 4 { - copy(buf[:], g[i:]) - s += binary.LittleEndian.Uint32(buf[:]) - } - sink = uint64(s) + benchmarks := []struct { + name string + nbyte int + f func([]byte) uint64 + }{ + {"2byte", 2, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint16(buf)) }}, + {"4byte", 4, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint32(buf)) }}, + {"8byte", 8, func(buf []byte) uint64 { return binary.LittleEndian.Uint64(buf) }}, + } + + var g [4096]byte + for _, bm := range benchmarks { + buf := make([]byte, bm.nbyte) + b.Run(bm.name, func(b *testing.B) { + for j := 0; j < b.N; j++ { + for i := 0; i < 4096; i += bm.nbyte { + copy(buf[:], g[i:]) + sink += bm.f(buf[:]) + } + } + }) } } - -// TODO: 2 byte and 8 byte benchmarks also. - -var g [4096]byte |