diff options
author | Ian Lance Taylor <iant@golang.org> | 2021-07-30 14:28:58 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2021-08-12 20:23:07 -0700 |
commit | c5b21c3f4c17b0649155035d2f9aa97b2da8a813 (patch) | |
tree | c6d3a68b503ba5b16182acbb958e3e5dbc95a43b /libgo/go/math | |
parent | 72be20e20299ec57b4bc9ba03d5b7d6bf10e97cc (diff) | |
download | gcc-c5b21c3f4c17b0649155035d2f9aa97b2da8a813.zip gcc-c5b21c3f4c17b0649155035d2f9aa97b2da8a813.tar.gz gcc-c5b21c3f4c17b0649155035d2f9aa97b2da8a813.tar.bz2 |
libgo: update to Go1.17rc2
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/341629
Diffstat (limited to 'libgo/go/math')
51 files changed, 1571 insertions, 504 deletions
diff --git a/libgo/go/math/all_test.go b/libgo/go/math/all_test.go index 3aae037..55c805e 100644 --- a/libgo/go/math/all_test.go +++ b/libgo/go/math/all_test.go @@ -2067,6 +2067,21 @@ var fmaC = []struct{ x, y, z, want float64 }{ {-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308}, } +var sqrt32 = []float32{ + 0, + float32(Copysign(0, -1)), + float32(NaN()), + float32(Inf(1)), + float32(Inf(-1)), + 1, + 2, + -2, + 4.9790119248836735e+00, + 7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, +} + func tolerance(a, b, e float64) bool { // Multiplying by e here can underflow denormal values to zero. // Check a==b so that at least if a and b are small and identical @@ -3181,6 +3196,34 @@ func TestFloatMinMax(t *testing.T) { } } +func TestFloatMinima(t *testing.T) { + if q := float32(SmallestNonzeroFloat32 / 2); q != 0 { + t.Errorf("float32(SmallestNonzeroFloat32 / 2) = %g, want 0", q) + } + if q := float64(SmallestNonzeroFloat64 / 2); q != 0 { + t.Errorf("float64(SmallestNonzeroFloat64 / 2) = %g, want 0", q) + } +} + +var indirectSqrt = Sqrt + +// TestFloat32Sqrt checks the correctness of the float32 square root optimization result. +func TestFloat32Sqrt(t *testing.T) { + for _, v := range sqrt32 { + want := float32(indirectSqrt(float64(v))) + got := float32(Sqrt(float64(v))) + if IsNaN(float64(want)) { + if !IsNaN(float64(got)) { + t.Errorf("got=%#v want=NaN, v=%#v", got, v) + } + continue + } + if got != want { + t.Errorf("got=%#v want=%#v, v=%#v", got, want, v) + } + } +} + // Benchmarks // Global exported variables are used to store the diff --git a/libgo/go/math/arith_s390x.go b/libgo/go/math/arith_s390x.go index c28651e..6352734 100644 --- a/libgo/go/math/arith_s390x.go +++ b/libgo/go/math/arith_s390x.go @@ -2,78 +2,172 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore package math import "internal/cpu" +func expTrampolineSetup(x float64) float64 +func expAsm(x float64) float64 + +func logTrampolineSetup(x float64) float64 +func logAsm(x float64) float64 + +// Below here all functions are grouped in stubs.go for other +// architectures. + +const haveArchLog10 = true + +func archLog10(x float64) float64 func log10TrampolineSetup(x float64) float64 func log10Asm(x float64) float64 +const haveArchCos = true + +func archCos(x float64) float64 func cosTrampolineSetup(x float64) float64 func cosAsm(x float64) float64 +const haveArchCosh = true + +func archCosh(x float64) float64 func coshTrampolineSetup(x float64) float64 func coshAsm(x float64) float64 +const haveArchSin = true + +func archSin(x float64) float64 func sinTrampolineSetup(x float64) float64 func sinAsm(x float64) float64 +const haveArchSinh = true + +func archSinh(x float64) float64 func sinhTrampolineSetup(x float64) float64 func sinhAsm(x float64) float64 +const haveArchTanh = true + +func archTanh(x float64) float64 func tanhTrampolineSetup(x float64) float64 func tanhAsm(x float64) float64 +const haveArchLog1p = true + +func archLog1p(x float64) float64 func log1pTrampolineSetup(x float64) float64 func log1pAsm(x float64) float64 +const haveArchAtanh = true + +func archAtanh(x float64) float64 func atanhTrampolineSetup(x float64) float64 func atanhAsm(x float64) float64 +const haveArchAcos = true + +func archAcos(x float64) float64 func acosTrampolineSetup(x float64) float64 func acosAsm(x float64) float64 +const haveArchAcosh = true + +func archAcosh(x float64) float64 func acoshTrampolineSetup(x float64) float64 func acoshAsm(x float64) float64 +const haveArchAsin = true + +func archAsin(x float64) float64 func asinTrampolineSetup(x float64) float64 func asinAsm(x float64) float64 +const haveArchAsinh = true + +func archAsinh(x float64) float64 func asinhTrampolineSetup(x float64) float64 func asinhAsm(x float64) float64 +const haveArchErf = true + +func archErf(x float64) float64 func erfTrampolineSetup(x float64) float64 func erfAsm(x float64) float64 +const haveArchErfc = true + +func archErfc(x float64) float64 func erfcTrampolineSetup(x float64) float64 func erfcAsm(x float64) float64 +const haveArchAtan = true + +func archAtan(x float64) float64 func atanTrampolineSetup(x float64) float64 func atanAsm(x float64) float64 +const haveArchAtan2 = true + +func archAtan2(y, x float64) float64 func atan2TrampolineSetup(x, y float64) float64 func atan2Asm(x, y float64) float64 +const haveArchCbrt = true + +func archCbrt(x float64) float64 func cbrtTrampolineSetup(x float64) float64 func cbrtAsm(x float64) float64 -func logTrampolineSetup(x float64) float64 -func logAsm(x float64) float64 +const haveArchTan = true +func archTan(x float64) float64 func tanTrampolineSetup(x float64) float64 func tanAsm(x float64) float64 -func expTrampolineSetup(x float64) float64 -func expAsm(x float64) float64 +const haveArchExpm1 = true +func archExpm1(x float64) float64 func expm1TrampolineSetup(x float64) float64 func expm1Asm(x float64) float64 +const haveArchPow = true + +func archPow(x, y float64) float64 func powTrampolineSetup(x, y float64) float64 func powAsm(x, y float64) float64 +const haveArchFrexp = false + +func archFrexp(x float64) (float64, int) { + panic("not implemented") +} + +const haveArchLdexp = false + +func archLdexp(frac float64, exp int) float64 { + panic("not implemented") +} + +const haveArchLog2 = false + +func archLog2(x float64) float64 { + panic("not implemented") +} + +const haveArchMod = false + +func archMod(x, y float64) float64 { + panic("not implemented") +} + +const haveArchRemainder = false + +func archRemainder(x, y float64) float64 { + panic("not implemented") +} + // hasVX reports whether the machine has the z/Architecture // vector facility installed and enabled. var hasVX = cpu.S390X.HasVX diff --git a/libgo/go/math/asin.go b/libgo/go/math/asin.go index 46a5fe9..ac70bcb 100644 --- a/libgo/go/math/asin.go +++ b/libgo/go/math/asin.go @@ -16,14 +16,13 @@ package math // Special cases are: // Asin(±0) = ±0 // Asin(x) = NaN if x < -1 or x > 1 - -//extern asin -func libc_asin(float64) float64 - func Asin(x float64) float64 { return libc_asin(x) } +//extern asin +func libc_asin(float64) float64 + func asin(x float64) float64 { if x == 0 { return x // special case @@ -54,14 +53,13 @@ func asin(x float64) float64 { // // Special case is: // Acos(x) = NaN if x < -1 or x > 1 - -//extern acos -func libc_acos(float64) float64 - func Acos(x float64) float64 { return libc_acos(x) } +//extern acos +func libc_acos(float64) float64 + func acos(x float64) float64 { return Pi/2 - Asin(x) } diff --git a/libgo/go/math/atan.go b/libgo/go/math/atan.go index 4c9eda4..e4c997e 100644 --- a/libgo/go/math/atan.go +++ b/libgo/go/math/atan.go @@ -90,12 +90,8 @@ func satan(x float64) float64 { // Atan returns the arctangent, in radians, of x. // // Special cases are: -// Atan(±0) = ±0 -// Atan(±Inf) = ±Pi/2 - -//extern atan -func libc_atan(float64) float64 - +// Atan(±0) = ±0 +// Atan(±Inf) = ±Pi/2 func Atan(x float64) float64 { if x == 0 { return x @@ -103,6 +99,9 @@ func Atan(x float64) float64 { return libc_atan(x) } +//extern atan +func libc_atan(float64) float64 + func atan(x float64) float64 { if x == 0 { return x diff --git a/libgo/go/math/big/arith.go b/libgo/go/math/big/arith.go index 750ce8a..8f55c19 100644 --- a/libgo/go/math/big/arith.go +++ b/libgo/go/math/big/arith.go @@ -170,12 +170,16 @@ func shrVU_g(z, x []Word, s uint) (c Word) { if len(z) == 0 { return } + if len(x) != len(z) { + // This is an invariant guaranteed by the caller. + panic("len(x) != len(z)") + } s &= _W - 1 // hint to the compiler that shifts by s don't need guard code ŝ := _W - s ŝ &= _W - 1 // ditto c = x[0] << ŝ - for i := 0; i < len(z)-1; i++ { - z[i] = x[i]>>s | x[i+1]<<ŝ + for i := 1; i < len(z); i++ { + z[i-1] = x[i-1]>>s | x[i]<<ŝ } z[len(z)-1] = x[len(z)-1] >> s return @@ -263,20 +267,6 @@ func divWW(x1, x0, y, m Word) (q, r Word) { return Word(qq), Word(r0 >> s) } -func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { - r = xn - if len(x) == 1 { - qq, rr := bits.Div(uint(r), uint(x[0]), uint(y)) - z[0] = Word(qq) - return Word(rr) - } - rec := reciprocalWord(y) - for i := len(z) - 1; i >= 0; i-- { - z[i], r = divWW(r, x[i], y, rec) - } - return r -} - // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1). func reciprocalWord(d1 Word) Word { u := uint(d1 << nlz(d1)) diff --git a/libgo/go/math/big/arith_amd64.go b/libgo/go/math/big/arith_amd64.go index 35164f3..8e4e1a9 100644 --- a/libgo/go/math/big/arith_amd64.go +++ b/libgo/go/math/big/arith_amd64.go @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore_for_gccgo -// +build !math_big_pure_go +//go:build ignore +// +build ignore package big diff --git a/libgo/go/math/big/arith_decl.go b/libgo/go/math/big/arith_decl.go index a14fda3..d80b92b 100644 --- a/libgo/go/math/big/arith_decl.go +++ b/libgo/go/math/big/arith_decl.go @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore -// +build !math_big_pure_go package big diff --git a/libgo/go/math/big/arith_decl_pure.go b/libgo/go/math/big/arith_decl_pure.go index ac78985..cbfd1dd 100644 --- a/libgo/go/math/big/arith_decl_pure.go +++ b/libgo/go/math/big/arith_decl_pure.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//-go:build math_big_pure_go // -build math_big_pure_go package big diff --git a/libgo/go/math/big/arith_decl_s390x.go b/libgo/go/math/big/arith_decl_s390x.go index 712f115..96445b0 100644 --- a/libgo/go/math/big/arith_decl_s390x.go +++ b/libgo/go/math/big/arith_decl_s390x.go @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore -// +build !math_big_pure_go package big diff --git a/libgo/go/math/big/arith_s390x_test.go b/libgo/go/math/big/arith_s390x_test.go index be1ca7d..bda4811 100644 --- a/libgo/go/math/big/arith_s390x_test.go +++ b/libgo/go/math/big/arith_s390x_test.go @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore -// +build s390x,!math_big_pure_go package big diff --git a/libgo/go/math/big/arith_test.go b/libgo/go/math/big/arith_test.go index 2aca0ef..7b3427f 100644 --- a/libgo/go/math/big/arith_test.go +++ b/libgo/go/math/big/arith_test.go @@ -671,3 +671,27 @@ func BenchmarkDivWVW(b *testing.B) { }) } } + +func BenchmarkNonZeroShifts(b *testing.B) { + for _, n := range benchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + x := rndV(n) + s := uint(rand.Int63n(_W-2)) + 1 // avoid 0 and over-large shifts + z := make([]Word, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n * _W)) + b.Run("shrVU", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = shrVU(z, x, s) + } + }) + b.Run("shlVU", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = shlVU(z, x, s) + } + }) + }) + } +} diff --git a/libgo/go/math/big/int.go b/libgo/go/math/big/int.go index 65f3248..7647346 100644 --- a/libgo/go/math/big/int.go +++ b/libgo/go/math/big/int.go @@ -425,7 +425,7 @@ func (z *Int) SetString(s string, base int) (*Int, bool) { return z.setFromScanner(strings.NewReader(s), base) } -// setFromScanner implements SetString given an io.BytesScanner. +// setFromScanner implements SetString given an io.ByteScanner. // For documentation see comments of SetString. func (z *Int) setFromScanner(r io.ByteScanner, base int) (*Int, bool) { if _, _, err := z.scan(r, base); err != nil { diff --git a/libgo/go/math/big/link_test.go b/libgo/go/math/big/link_test.go index 42f9cef..6e33aa5 100644 --- a/libgo/go/math/big/link_test.go +++ b/libgo/go/math/big/link_test.go @@ -42,7 +42,7 @@ func main() {} if err != nil { t.Fatalf("nm: %v, %s", err, nm) } - const want = "runtime.(*Frames).Next" + const want = "runtime.main" if !bytes.Contains(nm, []byte(want)) { // Test the test. t.Errorf("expected symbol %q not found", want) diff --git a/libgo/go/math/big/nat.go b/libgo/go/math/big/nat.go index bbd6c88..140c619 100644 --- a/libgo/go/math/big/nat.go +++ b/libgo/go/math/big/nat.go @@ -631,48 +631,6 @@ func (z nat) mulRange(a, b uint64) nat { return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) } -// q = (x-r)/y, with 0 <= r < y -func (z nat) divW(x nat, y Word) (q nat, r Word) { - m := len(x) - switch { - case y == 0: - panic("division by zero") - case y == 1: - q = z.set(x) // result is x - return - case m == 0: - q = z[:0] // result is 0 - return - } - // m > 0 - z = z.make(m) - r = divWVW(z, 0, x, y) - q = z.norm() - return -} - -func (z nat) div(z2, u, v nat) (q, r nat) { - if len(v) == 0 { - panic("division by zero") - } - - if u.cmp(v) < 0 { - q = z[:0] - r = z2.set(u) - return - } - - if len(v) == 1 { - var r2 Word - q, r2 = z.divW(u, v[0]) - r = z2.setWord(r2) - return - } - - q, r = z.divLarge(z2, u, v) - return -} - // getNat returns a *nat of len n. The contents may not be zero. // The pool holds *nat to avoid allocation when converting to interface{}. func getNat(n int) *nat { @@ -693,276 +651,6 @@ func putNat(x *nat) { var natPool sync.Pool -// q = (uIn-r)/vIn, with 0 <= r < vIn -// Uses z as storage for q, and u as storage for r if possible. -// See Knuth, Volume 2, section 4.3.1, Algorithm D. -// Preconditions: -// len(vIn) >= 2 -// len(uIn) >= len(vIn) -// u must not alias z -func (z nat) divLarge(u, uIn, vIn nat) (q, r nat) { - n := len(vIn) - m := len(uIn) - n - - // D1. - shift := nlz(vIn[n-1]) - // do not modify vIn, it may be used by another goroutine simultaneously - vp := getNat(n) - v := *vp - shlVU(v, vIn, shift) - - // u may safely alias uIn or vIn, the value of uIn is used to set u and vIn was already used - u = u.make(len(uIn) + 1) - u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) - - // z may safely alias uIn or vIn, both values were used already - if alias(z, u) { - z = nil // z is an alias for u - cannot reuse - } - q = z.make(m + 1) - - if n < divRecursiveThreshold { - q.divBasic(u, v) - } else { - q.divRecursive(u, v) - } - putNat(vp) - - q = q.norm() - shrVU(u, u, shift) - r = u.norm() - - return q, r -} - -// divBasic performs word-by-word division of u by v. -// The quotient is written in pre-allocated q. -// The remainder overwrites input u. -// -// Precondition: -// - q is large enough to hold the quotient u / v -// which has a maximum length of len(u)-len(v)+1. -func (q nat) divBasic(u, v nat) { - n := len(v) - m := len(u) - n - - qhatvp := getNat(n + 1) - qhatv := *qhatvp - - // D2. - vn1 := v[n-1] - rec := reciprocalWord(vn1) - for j := m; j >= 0; j-- { - // D3. - qhat := Word(_M) - var ujn Word - if j+n < len(u) { - ujn = u[j+n] - } - if ujn != vn1 { - var rhat Word - qhat, rhat = divWW(ujn, u[j+n-1], vn1, rec) - - // x1 | x2 = q̂v_{n-2} - vn2 := v[n-2] - x1, x2 := mulWW(qhat, vn2) - // test if q̂v_{n-2} > br̂ + u_{j+n-2} - ujn2 := u[j+n-2] - for greaterThan(x1, x2, rhat, ujn2) { - qhat-- - prevRhat := rhat - rhat += vn1 - // v[n-1] >= 0, so this tests for overflow. - if rhat < prevRhat { - break - } - x1, x2 = mulWW(qhat, vn2) - } - } - - // D4. - // Compute the remainder u - (q̂*v) << (_W*j). - // The subtraction may overflow if q̂ estimate was off by one. - qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) - qhl := len(qhatv) - if j+qhl > len(u) && qhatv[n] == 0 { - qhl-- - } - c := subVV(u[j:j+qhl], u[j:], qhatv) - if c != 0 { - c := addVV(u[j:j+n], u[j:], v) - // If n == qhl, the carry from subVV and the carry from addVV - // cancel out and don't affect u[j+n]. - if n < qhl { - u[j+n] += c - } - qhat-- - } - - if j == m && m == len(q) && qhat == 0 { - continue - } - q[j] = qhat - } - - putNat(qhatvp) -} - -const divRecursiveThreshold = 100 - -// divRecursive performs word-by-word division of u by v. -// The quotient is written in pre-allocated z. -// The remainder overwrites input u. -// -// Precondition: -// - len(z) >= len(u)-len(v) -// -// See Burnikel, Ziegler, "Fast Recursive Division", Algorithm 1 and 2. -func (z nat) divRecursive(u, v nat) { - // Recursion depth is less than 2 log2(len(v)) - // Allocate a slice of temporaries to be reused across recursion. - recDepth := 2 * bits.Len(uint(len(v))) - // large enough to perform Karatsuba on operands as large as v - tmp := getNat(3 * len(v)) - temps := make([]*nat, recDepth) - z.clear() - z.divRecursiveStep(u, v, 0, tmp, temps) - for _, n := range temps { - if n != nil { - putNat(n) - } - } - putNat(tmp) -} - -// divRecursiveStep computes the division of u by v. -// - z must be large enough to hold the quotient -// - the quotient will overwrite z -// - the remainder will overwrite u -func (z nat) divRecursiveStep(u, v nat, depth int, tmp *nat, temps []*nat) { - u = u.norm() - v = v.norm() - - if len(u) == 0 { - z.clear() - return - } - n := len(v) - if n < divRecursiveThreshold { - z.divBasic(u, v) - return - } - m := len(u) - n - if m < 0 { - return - } - - // Produce the quotient by blocks of B words. - // Division by v (length n) is done using a length n/2 division - // and a length n/2 multiplication for each block. The final - // complexity is driven by multiplication complexity. - B := n / 2 - - // Allocate a nat for qhat below. - if temps[depth] == nil { - temps[depth] = getNat(n) - } else { - *temps[depth] = temps[depth].make(B + 1) - } - - j := m - for j > B { - // Divide u[j-B:j+n] by vIn. Keep remainder in u - // for next block. - // - // The following property will be used (Lemma 2): - // if u = u1 << s + u0 - // v = v1 << s + v0 - // then floor(u1/v1) >= floor(u/v) - // - // Moreover, the difference is at most 2 if len(v1) >= len(u/v) - // We choose s = B-1 since len(v)-s >= B+1 >= len(u/v) - s := (B - 1) - // Except for the first step, the top bits are always - // a division remainder, so the quotient length is <= n. - uu := u[j-B:] - - qhat := *temps[depth] - qhat.clear() - qhat.divRecursiveStep(uu[s:B+n], v[s:], depth+1, tmp, temps) - qhat = qhat.norm() - // Adjust the quotient: - // u = u_h << s + u_l - // v = v_h << s + v_l - // u_h = q̂ v_h + rh - // u = q̂ (v - v_l) + rh << s + u_l - // After the above step, u contains a remainder: - // u = rh << s + u_l - // and we need to subtract q̂ v_l - // - // But it may be a bit too large, in which case q̂ needs to be smaller. - qhatv := tmp.make(3 * n) - qhatv.clear() - qhatv = qhatv.mul(qhat, v[:s]) - for i := 0; i < 2; i++ { - e := qhatv.cmp(uu.norm()) - if e <= 0 { - break - } - subVW(qhat, qhat, 1) - c := subVV(qhatv[:s], qhatv[:s], v[:s]) - if len(qhatv) > s { - subVW(qhatv[s:], qhatv[s:], c) - } - addAt(uu[s:], v[s:], 0) - } - if qhatv.cmp(uu.norm()) > 0 { - panic("impossible") - } - c := subVV(uu[:len(qhatv)], uu[:len(qhatv)], qhatv) - if c > 0 { - subVW(uu[len(qhatv):], uu[len(qhatv):], c) - } - addAt(z, qhat, j-B) - j -= B - } - - // Now u < (v<<B), compute lower bits in the same way. - // Choose shift = B-1 again. - s := B - 1 - qhat := *temps[depth] - qhat.clear() - qhat.divRecursiveStep(u[s:].norm(), v[s:], depth+1, tmp, temps) - qhat = qhat.norm() - qhatv := tmp.make(3 * n) - qhatv.clear() - qhatv = qhatv.mul(qhat, v[:s]) - // Set the correct remainder as before. - for i := 0; i < 2; i++ { - if e := qhatv.cmp(u.norm()); e > 0 { - subVW(qhat, qhat, 1) - c := subVV(qhatv[:s], qhatv[:s], v[:s]) - if len(qhatv) > s { - subVW(qhatv[s:], qhatv[s:], c) - } - addAt(u[s:], v[s:], 0) - } - } - if qhatv.cmp(u.norm()) > 0 { - panic("impossible") - } - c := subVV(u[0:len(qhatv)], u[0:len(qhatv)], qhatv) - if c > 0 { - c = subVW(u[len(qhatv):], u[len(qhatv):], c) - } - if c > 0 { - panic("impossible") - } - - // Done! - addAt(z, qhat.norm(), 0) -} - // Length of x in bits. x must be normalized. func (x nat) bitLen() int { if i := len(x) - 1; i >= 0 { @@ -1170,19 +858,6 @@ func (z nat) xor(x, y nat) nat { return z.norm() } -// greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2) -func greaterThan(x1, x2, y1, y2 Word) bool { - return x1 > y1 || x1 == y1 && x2 > y2 -} - -// modW returns x % d. -func (x nat) modW(d Word) (r Word) { - // TODO(agl): we don't actually need to store the q value. - var q nat - q = q.make(len(x)) - return divWVW(q, 0, x, d) -} - // random creates a random integer in [0..limit), using the space in z if // possible. n is the bit length of limit. func (z nat) random(rand *rand.Rand, limit nat, n int) nat { diff --git a/libgo/go/math/big/natdiv.go b/libgo/go/math/big/natdiv.go new file mode 100644 index 0000000..882bb6d --- /dev/null +++ b/libgo/go/math/big/natdiv.go @@ -0,0 +1,884 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + +Multi-precision division. Here be dragons. + +Given u and v, where u is n+m digits, and v is n digits (with no leading zeros), +the goal is to return quo, rem such that u = quo*v + rem, where 0 ≤ rem < v. +That is, quo = ⌊u/v⌋ where ⌊x⌋ denotes the floor (truncation to integer) of x, +and rem = u - quo·v. + + +Long Division + +Division in a computer proceeds the same as long division in elementary school, +but computers are not as good as schoolchildren at following vague directions, +so we have to be much more precise about the actual steps and what can happen. + +We work from most to least significant digit of the quotient, doing: + + • Guess a digit q, the number of v to subtract from the current + section of u to zero out the topmost digit. + • Check the guess by multiplying q·v and comparing it against + the current section of u, adjusting the guess as needed. + • Subtract q·v from the current section of u. + • Add q to the corresponding section of the result quo. + +When all digits have been processed, the final remainder is left in u +and returned as rem. + +For example, here is a sketch of dividing 5 digits by 3 digits (n=3, m=2). + + q₂ q₁ q₀ + _________________ + v₂ v₁ v₀ ) u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ | | + [u₄ u₃ u₂]| | + - [ q₂·v ]| | + ----------- ↓ | + [ rem | u₁]| + - [ q₁·v ]| + ----------- ↓ + [ rem | u₀] + - [ q₀·v ] + ------------ + [ rem ] + +Instead of creating new storage for the remainders and copying digits from u +as indicated by the arrows, we use u's storage directly as both the source +and destination of the subtractions, so that the remainders overwrite +successive overlapping sections of u as the division proceeds, using a slice +of u to identify the current section. This avoids all the copying as well as +shifting of remainders. + +Division of u with n+m digits by v with n digits (in base B) can in general +produce at most m+1 digits, because: + + • u < B^(n+m) [B^(n+m) has n+m+1 digits] + • v ≥ B^(n-1) [B^(n-1) is the smallest n-digit number] + • u/v < B^(n+m) / B^(n-1) [divide bounds for u, v] + • u/v < B^(m+1) [simplify] + +The first step is special: it takes the top n digits of u and divides them by +the n digits of v, producing the first quotient digit and an n-digit remainder. +In the example, q₂ = ⌊u₄u₃u₂ / v⌋. + +The first step divides n digits by n digits to ensure that it produces only a +single digit. + +Each subsequent step appends the next digit from u to the remainder and divides +those n+1 digits by the n digits of v, producing another quotient digit and a +new n-digit remainder. + +Subsequent steps divide n+1 digits by n digits, an operation that in general +might produce two digits. However, as used in the algorithm, that division is +guaranteed to produce only a single digit. The dividend is of the form +rem·B + d, where rem is a remainder from the previous step and d is a single +digit, so: + + • rem ≤ v - 1 [rem is a remainder from dividing by v] + • rem·B ≤ v·B - B [multiply by B] + • d ≤ B - 1 [d is a single digit] + • rem·B + d ≤ v·B - 1 [add] + • rem·B + d < v·B [change ≤ to <] + • (rem·B + d)/v < B [divide by v] + + +Guess and Check + +At each step we need to divide n+1 digits by n digits, but this is for the +implementation of division by n digits, so we can't just invoke a division +routine: we _are_ the division routine. Instead, we guess at the answer and +then check it using multiplication. If the guess is wrong, we correct it. + +How can this guessing possibly be efficient? It turns out that the following +statement (let's call it the Good Guess Guarantee) is true. + +If + + • q = ⌊u/v⌋ where u is n+1 digits and v is n digits, + • q < B, and + • the topmost digit of v = vₙ₋₁ ≥ B/2, + +then q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ satisfies q ≤ q̂ ≤ q+2. (Proof below.) + +That is, if we know the answer has only a single digit and we guess an answer +by ignoring the bottom n-1 digits of u and v, using a 2-by-1-digit division, +then that guess is at least as large as the correct answer. It is also not +too much larger: it is off by at most two from the correct answer. + +Note that in the first step of the overall division, which is an n-by-n-digit +division, the 2-by-1 guess uses an implicit uₙ = 0. + +Note that using a 2-by-1-digit division here does not mean calling ourselves +recursively. Instead, we use an efficient direct hardware implementation of +that operation. + +Note that because q is u/v rounded down, q·v must not exceed u: u ≥ q·v. +If a guess q̂ is too big, it will not satisfy this test. Viewed a different way, +the remainder r̂ for a given q̂ is u - q̂·v, which must be positive. If it is +negative, then the guess q̂ is too big. + +This gives us a way to compute q. First compute q̂ with 2-by-1-digit division. +Then, while u < q̂·v, decrement q̂; this loop executes at most twice, because +q̂ ≤ q+2. + + +Scaling Inputs + +The Good Guess Guarantee requires that the top digit of v (vₙ₋₁) be at least B/2. +For example in base 10, ⌊172/19⌋ = 9, but ⌊18/1⌋ = 18: the guess is wildly off +because the first digit 1 is smaller than B/2 = 5. + +We can ensure that v has a large top digit by multiplying both u and v by the +right amount. Continuing the example, if we multiply both 172 and 19 by 3, we +now have ⌊516/57⌋, the leading digit of v is now ≥ 5, and sure enough +⌊51/5⌋ = 10 is much closer to the correct answer 9. It would be easier here +to multiply by 4, because that can be done with a shift. Specifically, we can +always count the number of leading zeros i in the first digit of v and then +shift both u and v left by i bits. + +Having scaled u and v, the value ⌊u/v⌋ is unchanged, but the remainder will +be scaled: 172 mod 19 is 1, but 516 mod 57 is 3. We have to divide the remainder +by the scaling factor (shifting right i bits) when we finish. + +Note that these shifts happen before and after the entire division algorithm, +not at each step in the per-digit iteration. + +Note the effect of scaling inputs on the size of the possible quotient. +In the scaled u/v, u can gain a digit from scaling; v never does, because we +pick the scaling factor to make v's top digit larger but without overflowing. +If u and v have n+m and n digits after scaling, then: + + • u < B^(n+m) [B^(n+m) has n+m+1 digits] + • v ≥ B^n / 2 [vₙ₋₁ ≥ B/2, so vₙ₋₁·B^(n-1) ≥ B^n/2] + • u/v < B^(n+m) / (B^n / 2) [divide bounds for u, v] + • u/v < 2 B^m [simplify] + +The quotient can still have m+1 significant digits, but if so the top digit +must be a 1. This provides a different way to handle the first digit of the +result: compare the top n digits of u against v and fill in either a 0 or a 1. + + +Refining Guesses + +Before we check whether u < q̂·v, we can adjust our guess to change it from +q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ into the refined guess ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋. +Although not mentioned above, the Good Guess Guarantee also promises that this +3-by-2-digit division guess is more precise and at most one away from the real +answer q. The improvement from the 2-by-1 to the 3-by-2 guess can also be done +without n-digit math. + +If we have a guess q̂ = ⌊uₙuₙ₋₁ / vₙ₋₁⌋ and we want to see if it also equal to +⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, we can use the same check we would for the full division: +if uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂, then the guess is too large and should be reduced. + +Checking uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ < 0, +and + + uₙuₙ₋₁uₙ₋₂ - q̂·vₙ₋₁vₙ₋₂ = (uₙuₙ₋₁·B + uₙ₋₂) - q̂·(vₙ₋₁·B + vₙ₋₂) + [splitting off the bottom digit] + = (uₙuₙ₋₁ - q̂·vₙ₋₁)·B + uₙ₋₂ - q̂·vₙ₋₂ + [regrouping] + +The expression (uₙuₙ₋₁ - q̂·vₙ₋₁) is the remainder of uₙuₙ₋₁ / vₙ₋₁. +If the initial guess returns both q̂ and its remainder r̂, then checking +whether uₙuₙ₋₁uₙ₋₂ < q̂·vₙ₋₁vₙ₋₂ is the same as checking r̂·B + uₙ₋₂ < q̂·vₙ₋₂. + +If we find that r̂·B + uₙ₋₂ < q̂·vₙ₋₂, then we can adjust the guess by +decrementing q̂ and adding vₙ₋₁ to r̂. We repeat until r̂·B + uₙ₋₂ ≥ q̂·vₙ₋₂. +(As before, this fixup is only needed at most twice.) + +Now that q̂ = ⌊uₙuₙ₋₁uₙ₋₂ / vₙ₋₁vₙ₋₂⌋, as mentioned above it is at most one +away from the correct q, and we've avoided doing any n-digit math. +(If we need the new remainder, it can be computed as r̂·B + uₙ₋₂ - q̂·vₙ₋₂.) + +The final check u < q̂·v and the possible fixup must be done at full precision. +For random inputs, a fixup at this step is exceedingly rare: the 3-by-2 guess +is not often wrong at all. But still we must do the check. Note that since the +3-by-2 guess is off by at most 1, it can be convenient to perform the final +u < q̂·v as part of the computation of the remainder r = u - q̂·v. If the +subtraction underflows, decremeting q̂ and adding one v back to r is enough to +arrive at the final q, r. + +That's the entirety of long division: scale the inputs, and then loop over +each output position, guessing, checking, and correcting the next output digit. + +For a 2n-digit number divided by an n-digit number (the worst size-n case for +division complexity), this algorithm uses n+1 iterations, each of which must do +at least the 1-by-n-digit multiplication q̂·v. That's O(n) iterations of +O(n) time each, so O(n²) time overall. + + +Recursive Division + +For very large inputs, it is possible to improve on the O(n²) algorithm. +Let's call a group of n/2 real digits a (very) “wide digit”. We can run the +standard long division algorithm explained above over the wide digits instead of +the actual digits. This will result in many fewer steps, but the math involved in +each step is more work. + +Where basic long division uses a 2-by-1-digit division to guess the initial q̂, +the new algorithm must use a 2-by-1-wide-digit division, which is of course +really an n-by-n/2-digit division. That's OK: if we implement n-digit division +in terms of n/2-digit division, the recursion will terminate when the divisor +becomes small enough to handle with standard long division or even with the +2-by-1 hardware instruction. + +For example, here is a sketch of dividing 10 digits by 4, proceeding with +wide digits corresponding to two regular digits. The first step, still special, +must leave off a (regular) digit, dividing 5 by 4 and producing a 4-digit +remainder less than v. The middle steps divide 6 digits by 4, guaranteed to +produce two output digits each (one wide digit) with 4-digit remainders. +The final step must use what it has: the 4-digit remainder plus one more, +5 digits to divide by 4. + + q₆ q₅ q₄ q₃ q₂ q₁ q₀ + _______________________________ + v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ ↓ ↓ | | | | | + [u₉ u₈ u₇ u₆ u₅]| | | | | + - [ q₆q₅·v ]| | | | | + ----------------- ↓ ↓ | | | + [ rem |u₄ u₃]| | | + - [ q₄q₃·v ]| | | + -------------------- ↓ ↓ | + [ rem |u₂ u₁]| + - [ q₂q₁·v ]| + -------------------- ↓ + [ rem |u₀] + - [ q₀·v ] + ------------------ + [ rem ] + +An alternative would be to look ahead to how well n/2 divides into n+m and +adjust the first step to use fewer digits as needed, making the first step +more special to make the last step not special at all. For example, using the +same input, we could choose to use only 4 digits in the first step, leaving +a full wide digit for the last step: + + q₆ q₅ q₄ q₃ q₂ q₁ q₀ + _______________________________ + v₃ v₂ v₁ v₀ ) u₉ u₈ u₇ u₆ u₅ u₄ u₃ u₂ u₁ u₀ + ↓ ↓ ↓ ↓ | | | | | | + [u₉ u₈ u₇ u₆]| | | | | | + - [ q₆·v ]| | | | | | + -------------- ↓ ↓ | | | | + [ rem |u₅ u₄]| | | | + - [ q₅q₄·v ]| | | | + -------------------- ↓ ↓ | | + [ rem |u₃ u₂]| | + - [ q₃q₂·v ]| | + -------------------- ↓ ↓ + [ rem |u₁ u₀] + - [ q₁q₀·v ] + --------------------- + [ rem ] + +Today, the code in divRecursiveStep works like the first example. Perhaps in +the future we will make it work like the alternative, to avoid a special case +in the final iteration. + +Either way, each step is a 3-by-2-wide-digit division approximated first by +a 2-by-1-wide-digit division, just as we did for regular digits in long division. +Because the actual answer we want is a 3-by-2-wide-digit division, instead of +multiplying q̂·v directly during the fixup, we can use the quick refinement +from long division (an n/2-by-n/2 multiply) to correct q to its actual value +and also compute the remainder (as mentioned above), and then stop after that, +never doing a full n-by-n multiply. + +Instead of using an n-by-n/2-digit division to produce n/2 digits, we can add +(not discard) one more real digit, doing an (n+1)-by-(n/2+1)-digit division that +produces n/2+1 digits. That single extra digit tightens the Good Guess Guarantee +to q ≤ q̂ ≤ q+1 and lets us drop long division's special treatment of the first +digit. These benefits are discussed more after the Good Guess Guarantee proof +below. + + +How Fast is Recursive Division? + +For a 2n-by-n-digit division, this algorithm runs a 4-by-2 long division over +wide digits, producing two wide digits plus a possible leading regular digit 1, +which can be handled without a recursive call. That is, the algorithm uses two +full iterations, each using an n-by-n/2-digit division and an n/2-by-n/2-digit +multiplication, along with a few n-digit additions and subtractions. The standard +n-by-n-digit multiplication algorithm requires O(n²) time, making the overall +algorithm require time T(n) where + + T(n) = 2T(n/2) + O(n) + O(n²) + +which, by the Bentley-Haken-Saxe theorem, ends up reducing to T(n) = O(n²). +This is not an improvement over regular long division. + +When the number of digits n becomes large enough, Karatsuba's algorithm for +multiplication can be used instead, which takes O(n^log₂3) = O(n^1.6) time. +(Karatsuba multiplication is implemented in func karatsuba in nat.go.) +That makes the overall recursive division algorithm take O(n^1.6) time as well, +which is an improvement, but again only for large enough numbers. + +It is not critical to make sure that every recursion does only two recursive +calls. While in general the number of recursive calls can change the time +analysis, in this case doing three calls does not change the analysis: + + T(n) = 3T(n/2) + O(n) + O(n^log₂3) + +ends up being T(n) = O(n^log₂3). Because the Karatsuba multiplication taking +time O(n^log₂3) is itself doing 3 half-sized recursions, doing three for the +division does not hurt the asymptotic performance. Of course, it is likely +still faster in practice to do two. + + +Proof of the Good Guess Guarantee + +Given numbers x, y, let us break them into the quotients and remainders when +divided by some scaling factor S, with the added constraints that the quotient +x/y and the high part of y are both less than some limit T, and that the high +part of y is at least half as big as T. + + x₁ = ⌊x/S⌋ y₁ = ⌊y/S⌋ + x₀ = x mod S y₀ = y mod S + + x = x₁·S + x₀ 0 ≤ x₀ < S x/y < T + y = y₁·S + y₀ 0 ≤ y₀ < S T/2 ≤ y₁ < T + +And consider the two truncated quotients: + + q = ⌊x/y⌋ + q̂ = ⌊x₁/y₁⌋ + +We will prove that q ≤ q̂ ≤ q+2. + +The guarantee makes no real demands on the scaling factor S: it is simply the +magnitude of the digits cut from both x and y to produce x₁ and y₁. +The guarantee makes only limited demands on T: it must be large enough to hold +the quotient x/y, and y₁ must have roughly the same size. + +To apply to the earlier discussion of 2-by-1 guesses in long division, +we would choose: + + S = Bⁿ⁻¹ + T = B + x = u + x₁ = uₙuₙ₋₁ + x₀ = uₙ₋₂...u₀ + y = v + y₁ = vₙ₋₁ + y₀ = vₙ₋₂...u₀ + +These simpler variables avoid repeating those longer expressions in the proof. + +Note also that, by definition, truncating division ⌊x/y⌋ satisfies + + x/y - 1 < ⌊x/y⌋ ≤ x/y. + +This fact will be used a few times in the proofs. + +Proof that q ≤ q̂: + + q̂·y₁ = ⌊x₁/y₁⌋·y₁ [by definition, q̂ = ⌊x₁/y₁⌋] + > (x₁/y₁ - 1)·y₁ [x₁/y₁ - 1 < ⌊x₁/y₁⌋] + = x₁ - y₁ [distribute y₁] + + So q̂·y₁ > x₁ - y₁. + Since q̂·y₁ is an integer, q̂·y₁ ≥ x₁ - y₁ + 1. + + q̂ - q = q̂ - ⌊x/y⌋ [by definition, q = ⌊x/y⌋] + ≥ q̂ - x/y [⌊x/y⌋ < x/y] + = (1/y)·(q̂·y - x) [factor out 1/y] + ≥ (1/y)·(q̂·y₁·S - x) [y = y₁·S + y₀ ≥ y₁·S] + ≥ (1/y)·((x₁ - y₁ + 1)·S - x) [above: q̂·y₁ ≥ x₁ - y₁ + 1] + = (1/y)·(x₁·S - y₁·S + S - x) [distribute S] + = (1/y)·(S - x₀ - y₁·S) [-x = -x₁·S - x₀] + > -y₁·S / y [x₀ < S, so S - x₀ < 0; drop it] + ≥ -1 [y₁·S ≤ y] + + So q̂ - q > -1. + Since q̂ - q is an integer, q̂ - q ≥ 0, or equivalently q ≤ q̂. + +Proof that q̂ ≤ q+2: + + x₁/y₁ - x/y = x₁·S/y₁·S - x/y [multiply left term by S/S] + ≤ x/y₁·S - x/y [x₁S ≤ x] + = (x/y)·(y/y₁·S - 1) [factor out x/y] + = (x/y)·((y - y₁·S)/y₁·S) [move -1 into y/y₁·S fraction] + = (x/y)·(y₀/y₁·S) [y - y₁·S = y₀] + = (x/y)·(1/y₁)·(y₀/S) [factor out 1/y₁] + < (x/y)·(1/y₁) [y₀ < S, so y₀/S < 1] + ≤ (x/y)·(2/T) [y₁ ≥ T/2, so 1/y₁ ≤ 2/T] + < T·(2/T) [x/y < T] + = 2 [T·(2/T) = 2] + + So x₁/y₁ - x/y < 2. + + q̂ - q = ⌊x₁/y₁⌋ - q [by definition, q̂ = ⌊x₁/y₁⌋] + = ⌊x₁/y₁⌋ - ⌊x/y⌋ [by definition, q = ⌊x/y⌋] + ≤ x₁/y₁ - ⌊x/y⌋ [⌊x₁/y₁⌋ ≤ x₁/y₁] + < x₁/y₁ - (x/y - 1) [⌊x/y⌋ > x/y - 1] + = (x₁/y₁ - x/y) + 1 [regrouping] + < 2 + 1 [above: x₁/y₁ - x/y < 2] + = 3 + + So q̂ - q < 3. + Since q̂ - q is an integer, q̂ - q ≤ 2. + +Note that when x/y < T/2, the bounds tighten to x₁/y₁ - x/y < 1 and therefore +q̂ - q ≤ 1. + +Note also that in the general case 2n-by-n division where we don't know that +x/y < T, we do know that x/y < 2T, yielding the bound q̂ - q ≤ 4. So we could +remove the special case first step of long division as long as we allow the +first fixup loop to run up to four times. (Using a simple comparison to decide +whether the first digit is 0 or 1 is still more efficient, though.) + +Finally, note that when dividing three leading base-B digits by two (scaled), +we have T = B² and x/y < B = T/B, a much tighter bound than x/y < T. +This in turn yields the much tighter bound x₁/y₁ - x/y < 2/B. This means that +⌊x₁/y₁⌋ and ⌊x/y⌋ can only differ when x/y is less than 2/B greater than an +integer. For random x and y, the chance of this is 2/B, or, for large B, +approximately zero. This means that after we produce the 3-by-2 guess in the +long division algorithm, the fixup loop essentially never runs. + +In the recursive algorithm, the extra digit in (2·⌊n/2⌋+1)-by-(⌊n/2⌋+1)-digit +division has exactly the same effect: the probability of needing a fixup is the +same 2/B. Even better, we can allow the general case x/y < 2T and the fixup +probability only grows to 4/B, still essentially zero. + + +References + +There are no great references for implementing long division; thus this comment. +Here are some notes about what to expect from the obvious references. + +Knuth Volume 2 (Seminumerical Algorithms) section 4.3.1 is the usual canonical +reference for long division, but that entire series is highly compressed, never +repeating a necessary fact and leaving important insights to the exercises. +For example, no rationale whatsoever is given for the calculation that extends +q̂ from a 2-by-1 to a 3-by-2 guess, nor why it reduces the error bound. +The proof that the calculation even has the desired effect is left to exercises. +The solutions to those exercises provided at the back of the book are entirely +calculations, still with no explanation as to what is going on or how you would +arrive at the idea of doing those exact calculations. Nowhere is it mentioned +that this test extends the 2-by-1 guess into a 3-by-2 guess. The proof of the +Good Guess Guarantee is only for the 2-by-1 guess and argues by contradiction, +making it difficult to understand how modifications like adding another digit +or adjusting the quotient range affects the overall bound. + +All that said, Knuth remains the canonical reference. It is dense but packed +full of information and references, and the proofs are simpler than many other +presentations. The proofs above are reworkings of Knuth's to remove the +arguments by contradiction and add explanations or steps that Knuth omitted. +But beware of errors in older printings. Take the published errata with you. + +Brinch Hansen's “Multiple-length Division Revisited: a Tour of the Minefield” +starts with a blunt critique of Knuth's presentation (among others) and then +presents a more detailed and easier to follow treatment of long division, +including an implementation in Pascal. But the algorithm and implementation +work entirely in terms of 3-by-2 division, which is much less useful on modern +hardware than an algorithm using 2-by-1 division. The proofs are a bit too +focused on digit counting and seem needlessly complex, especially compared to +the ones given above. + +Burnikel and Ziegler's “Fast Recursive Division” introduced the key insight of +implementing division by an n-digit divisor using recursive calls to division +by an n/2-digit divisor, relying on Karatsuba multiplication to yield a +sub-quadratic run time. However, the presentation decisions are made almost +entirely for the purpose of simplifying the run-time analysis, rather than +simplifying the presentation. Instead of a single algorithm that loops over +quotient digits, the paper presents two mutually-recursive algorithms, for +2n-by-n and 3n-by-2n. The paper also does not present any general (n+m)-by-n +algorithm. + +The proofs in the paper are remarkably complex, especially considering that +the algorithm is at its core just long division on wide digits, so that the +usual long division proofs apply essentially unaltered. +*/ + +package big + +import "math/bits" + +// div returns q, r such that q = ⌊u/v⌋ and r = u%v = u - q·v. +// It uses z and z2 as the storage for q and r. +func (z nat) div(z2, u, v nat) (q, r nat) { + if len(v) == 0 { + panic("division by zero") + } + + if u.cmp(v) < 0 { + q = z[:0] + r = z2.set(u) + return + } + + if len(v) == 1 { + // Short division: long optimized for a single-word divisor. + // In that case, the 2-by-1 guess is all we need at each step. + var r2 Word + q, r2 = z.divW(u, v[0]) + r = z2.setWord(r2) + return + } + + q, r = z.divLarge(z2, u, v) + return +} + +// divW returns q, r such that q = ⌊x/y⌋ and r = x%y = x - q·y. +// It uses z as the storage for q. +// Note that y is a single digit (Word), not a big number. +func (z nat) divW(x nat, y Word) (q nat, r Word) { + m := len(x) + switch { + case y == 0: + panic("division by zero") + case y == 1: + q = z.set(x) // result is x + return + case m == 0: + q = z[:0] // result is 0 + return + } + // m > 0 + z = z.make(m) + r = divWVW(z, 0, x, y) + q = z.norm() + return +} + +// modW returns x % d. +func (x nat) modW(d Word) (r Word) { + // TODO(agl): we don't actually need to store the q value. + var q nat + q = q.make(len(x)) + return divWVW(q, 0, x, d) +} + +// divWVW overwrites z with ⌊x/y⌋, returning the remainder r. +// The caller must ensure that len(z) = len(x). +func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { + r = xn + if len(x) == 1 { + qq, rr := bits.Div(uint(r), uint(x[0]), uint(y)) + z[0] = Word(qq) + return Word(rr) + } + rec := reciprocalWord(y) + for i := len(z) - 1; i >= 0; i-- { + z[i], r = divWW(r, x[i], y, rec) + } + return r +} + +// div returns q, r such that q = ⌊uIn/vIn⌋ and r = uIn%vIn = uIn - q·vIn. +// It uses z and u as the storage for q and r. +// The caller must ensure that len(vIn) ≥ 2 (use divW otherwise) +// and that len(uIn) ≥ len(vIn) (the answer is 0, uIn otherwise). +func (z nat) divLarge(u, uIn, vIn nat) (q, r nat) { + n := len(vIn) + m := len(uIn) - n + + // Scale the inputs so vIn's top bit is 1 (see “Scaling Inputs” above). + // vIn is treated as a read-only input (it may be in use by another + // goroutine), so we must make a copy. + // uIn is copied to u. + shift := nlz(vIn[n-1]) + vp := getNat(n) + v := *vp + shlVU(v, vIn, shift) + u = u.make(len(uIn) + 1) + u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) + + // The caller should not pass aliased z and u, since those are + // the two different outputs, but correct just in case. + if alias(z, u) { + z = nil + } + q = z.make(m + 1) + + // Use basic or recursive long division depending on size. + if n < divRecursiveThreshold { + q.divBasic(u, v) + } else { + q.divRecursive(u, v) + } + putNat(vp) + + q = q.norm() + + // Undo scaling of remainder. + shrVU(u, u, shift) + r = u.norm() + + return q, r +} + +// divBasic implements long division as described above. +// It overwrites q with ⌊u/v⌋ and overwrites u with the remainder r. +// q must be large enough to hold ⌊u/v⌋. +func (q nat) divBasic(u, v nat) { + n := len(v) + m := len(u) - n + + qhatvp := getNat(n + 1) + qhatv := *qhatvp + + // Set up for divWW below, precomputing reciprocal argument. + vn1 := v[n-1] + rec := reciprocalWord(vn1) + + // Compute each digit of quotient. + for j := m; j >= 0; j-- { + // Compute the 2-by-1 guess q̂. + // The first iteration must invent a leading 0 for u. + qhat := Word(_M) + var ujn Word + if j+n < len(u) { + ujn = u[j+n] + } + + // ujn ≤ vn1, or else q̂ would be more than one digit. + // For ujn == vn1, we set q̂ to the max digit M above. + // Otherwise, we compute the 2-by-1 guess. + if ujn != vn1 { + var rhat Word + qhat, rhat = divWW(ujn, u[j+n-1], vn1, rec) + + // Refine q̂ to a 3-by-2 guess. See “Refining Guesses” above. + vn2 := v[n-2] + x1, x2 := mulWW(qhat, vn2) + ujn2 := u[j+n-2] + for greaterThan(x1, x2, rhat, ujn2) { // x1x2 > r̂ u[j+n-2] + qhat-- + prevRhat := rhat + rhat += vn1 + // If r̂ overflows, then + // r̂ u[j+n-2]v[n-1] is now definitely > x1 x2. + if rhat < prevRhat { + break + } + // TODO(rsc): No need for a full mulWW. + // x2 += vn2; if x2 overflows, x1++ + x1, x2 = mulWW(qhat, vn2) + } + } + + // Compute q̂·v. + qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) + qhl := len(qhatv) + if j+qhl > len(u) && qhatv[n] == 0 { + qhl-- + } + + // Subtract q̂·v from the current section of u. + // If it underflows, q̂·v > u, which we fix up + // by decrementing q̂ and adding v back. + c := subVV(u[j:j+qhl], u[j:], qhatv) + if c != 0 { + c := addVV(u[j:j+n], u[j:], v) + // If n == qhl, the carry from subVV and the carry from addVV + // cancel out and don't affect u[j+n]. + if n < qhl { + u[j+n] += c + } + qhat-- + } + + // Save quotient digit. + // Caller may know the top digit is zero and not leave room for it. + if j == m && m == len(q) && qhat == 0 { + continue + } + q[j] = qhat + } + + putNat(qhatvp) +} + +// greaterThan reports whether the two digit numbers x1 x2 > y1 y2. +// TODO(rsc): In contradiction to most of this file, x1 is the high +// digit and x2 is the low digit. This should be fixed. +func greaterThan(x1, x2, y1, y2 Word) bool { + return x1 > y1 || x1 == y1 && x2 > y2 +} + +// divRecursiveThreshold is the number of divisor digits +// at which point divRecursive is faster than divBasic. +const divRecursiveThreshold = 100 + +// divRecursive implements recursive division as described above. +// It overwrites z with ⌊u/v⌋ and overwrites u with the remainder r. +// z must be large enough to hold ⌊u/v⌋. +// This function is just for allocating and freeing temporaries +// around divRecursiveStep, the real implementation. +func (z nat) divRecursive(u, v nat) { + // Recursion depth is (much) less than 2 log₂(len(v)). + // Allocate a slice of temporaries to be reused across recursion, + // plus one extra temporary not live across the recursion. + recDepth := 2 * bits.Len(uint(len(v))) + tmp := getNat(3 * len(v)) + temps := make([]*nat, recDepth) + + z.clear() + z.divRecursiveStep(u, v, 0, tmp, temps) + + // Free temporaries. + for _, n := range temps { + if n != nil { + putNat(n) + } + } + putNat(tmp) +} + +// divRecursiveStep is the actual implementation of recursive division. +// It adds ⌊u/v⌋ to z and overwrites u with the remainder r. +// z must be large enough to hold ⌊u/v⌋. +// It uses temps[depth] (allocating if needed) as a temporary live across +// the recursive call. It also uses tmp, but not live across the recursion. +func (z nat) divRecursiveStep(u, v nat, depth int, tmp *nat, temps []*nat) { + // u is a subsection of the original and may have leading zeros. + // TODO(rsc): The v = v.norm() is useless and should be removed. + // We know (and require) that v's top digit is ≥ B/2. + u = u.norm() + v = v.norm() + if len(u) == 0 { + z.clear() + return + } + + // Fall back to basic division if the problem is now small enough. + n := len(v) + if n < divRecursiveThreshold { + z.divBasic(u, v) + return + } + + // Nothing to do if u is shorter than v (implies u < v). + m := len(u) - n + if m < 0 { + return + } + + // We consider B digits in a row as a single wide digit. + // (See “Recursive Division” above.) + // + // TODO(rsc): rename B to Wide, to avoid confusion with _B, + // which is something entirely different. + // TODO(rsc): Look into whether using ⌈n/2⌉ is better than ⌊n/2⌋. + B := n / 2 + + // Allocate a nat for qhat below. + if temps[depth] == nil { + temps[depth] = getNat(n) // TODO(rsc): Can be just B+1. + } else { + *temps[depth] = temps[depth].make(B + 1) + } + + // Compute each wide digit of the quotient. + // + // TODO(rsc): Change the loop to be + // for j := (m+B-1)/B*B; j > 0; j -= B { + // which will make the final step a regular step, letting us + // delete what amounts to an extra copy of the loop body below. + j := m + for j > B { + // Divide u[j-B:j+n] (3 wide digits) by v (2 wide digits). + // First make the 2-by-1-wide-digit guess using a recursive call. + // Then extend the guess to the full 3-by-2 (see “Refining Guesses”). + // + // For the 2-by-1-wide-digit guess, instead of doing 2B-by-B-digit, + // we use a (2B+1)-by-(B+1) digit, which handles the possibility that + // the result has an extra leading 1 digit as well as guaranteeing + // that the computed q̂ will be off by at most 1 instead of 2. + + // s is the number of digits to drop from the 3B- and 2B-digit chunks. + // We drop B-1 to be left with 2B+1 and B+1. + s := (B - 1) + + // uu is the up-to-3B-digit section of u we are working on. + uu := u[j-B:] + + // Compute the 2-by-1 guess q̂, leaving r̂ in uu[s:B+n]. + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(uu[s:B+n], v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + + // Extend to a 3-by-2 quotient and remainder. + // Because divRecursiveStep overwrote the top part of uu with + // the remainder r̂, the full uu already contains the equivalent + // of r̂·B + uₙ₋₂ from the “Refining Guesses” discussion. + // Subtracting q̂·vₙ₋₂ from it will compute the full-length remainder. + // If that subtraction underflows, q̂·v > u, which we fix up + // by decrementing q̂ and adding v back, same as in long division. + + // TODO(rsc): Instead of subtract and fix-up, this code is computing + // q̂·vₙ₋₂ and decrementing q̂ until that product is ≤ u. + // But we can do the subtraction directly, as in the comment above + // and in long division, because we know that q̂ is wrong by at most one. + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + for i := 0; i < 2; i++ { + e := qhatv.cmp(uu.norm()) + if e <= 0 { + break + } + subVW(qhat, qhat, 1) + c := subVV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + subVW(qhatv[s:], qhatv[s:], c) + } + addAt(uu[s:], v[s:], 0) + } + if qhatv.cmp(uu.norm()) > 0 { + panic("impossible") + } + c := subVV(uu[:len(qhatv)], uu[:len(qhatv)], qhatv) + if c > 0 { + subVW(uu[len(qhatv):], uu[len(qhatv):], c) + } + addAt(z, qhat, j-B) + j -= B + } + + // TODO(rsc): Rewrite loop as described above and delete all this code. + + // Now u < (v<<B), compute lower bits in the same way. + // Choose shift = B-1 again. + s := B - 1 + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(u[s:].norm(), v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + // Set the correct remainder as before. + for i := 0; i < 2; i++ { + if e := qhatv.cmp(u.norm()); e > 0 { + subVW(qhat, qhat, 1) + c := subVV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + subVW(qhatv[s:], qhatv[s:], c) + } + addAt(u[s:], v[s:], 0) + } + } + if qhatv.cmp(u.norm()) > 0 { + panic("impossible") + } + c := subVV(u[0:len(qhatv)], u[0:len(qhatv)], qhatv) + if c > 0 { + c = subVW(u[len(qhatv):], u[len(qhatv):], c) + } + if c > 0 { + panic("impossible") + } + + // Done! + addAt(z, qhat.norm(), 0) +} diff --git a/libgo/go/math/bits/bits.go b/libgo/go/math/bits/bits.go index 0bfe90c..d8fd6ae 100644 --- a/libgo/go/math/bits/bits.go +++ b/libgo/go/math/bits/bits.go @@ -8,7 +8,7 @@ // functions for the predeclared unsigned integer types. package bits -const uintSize = 32 << (^uint(0) >> 32 & 1) // 32 or 64 +const uintSize = 32 << (^uint(0) >> 63) // 32 or 64 // UintSize is the size of a uint in bits. const UintSize = uintSize diff --git a/libgo/go/math/bits/bits_errors.go b/libgo/go/math/bits/bits_errors.go index f174bd1..53dc113 100644 --- a/libgo/go/math/bits/bits_errors.go +++ b/libgo/go/math/bits/bits_errors.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !compiler_bootstrap // +build !compiler_bootstrap package bits diff --git a/libgo/go/math/bits/bits_errors_bootstrap.go b/libgo/go/math/bits/bits_errors_bootstrap.go index 5df5738..4d610d3 100644 --- a/libgo/go/math/bits/bits_errors_bootstrap.go +++ b/libgo/go/math/bits/bits_errors_bootstrap.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build compiler_bootstrap // +build compiler_bootstrap // This version used only for bootstrap (on this path we want diff --git a/libgo/go/math/bits/bits_tables.go b/libgo/go/math/bits/bits_tables.go index f1e15a0..f869b8d 100644 --- a/libgo/go/math/bits/bits_tables.go +++ b/libgo/go/math/bits/bits_tables.go @@ -6,78 +6,74 @@ package bits -var ntz8tab = [256]uint8{ - 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, -} +const ntz8tab = "" + + "\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" + + "\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" -var pop8tab = [256]uint8{ - 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, - 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, - 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, - 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, - 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, - 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, - 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, - 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, - 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, - 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, - 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, - 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, - 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, - 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, - 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, - 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08, -} +const pop8tab = "" + + "\x00\x01\x01\x02\x01\x02\x02\x03\x01\x02\x02\x03\x02\x03\x03\x04" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x01\x02\x02\x03\x02\x03\x03\x04\x02\x03\x03\x04\x03\x04\x04\x05" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x02\x03\x03\x04\x03\x04\x04\x05\x03\x04\x04\x05\x04\x05\x05\x06" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x03\x04\x04\x05\x04\x05\x05\x06\x04\x05\x05\x06\x05\x06\x06\x07" + + "\x04\x05\x05\x06\x05\x06\x06\x07\x05\x06\x06\x07\x06\x07\x07\x08" -var rev8tab = [256]uint8{ - 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, - 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, - 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, - 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, - 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, - 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, - 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, - 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, - 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, - 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, - 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, - 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, - 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, - 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, - 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, - 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, -} +const rev8tab = "" + + "\x00\x80\x40\xc0\x20\xa0\x60\xe0\x10\x90\x50\xd0\x30\xb0\x70\xf0" + + "\x08\x88\x48\xc8\x28\xa8\x68\xe8\x18\x98\x58\xd8\x38\xb8\x78\xf8" + + "\x04\x84\x44\xc4\x24\xa4\x64\xe4\x14\x94\x54\xd4\x34\xb4\x74\xf4" + + "\x0c\x8c\x4c\xcc\x2c\xac\x6c\xec\x1c\x9c\x5c\xdc\x3c\xbc\x7c\xfc" + + "\x02\x82\x42\xc2\x22\xa2\x62\xe2\x12\x92\x52\xd2\x32\xb2\x72\xf2" + + "\x0a\x8a\x4a\xca\x2a\xaa\x6a\xea\x1a\x9a\x5a\xda\x3a\xba\x7a\xfa" + + "\x06\x86\x46\xc6\x26\xa6\x66\xe6\x16\x96\x56\xd6\x36\xb6\x76\xf6" + + "\x0e\x8e\x4e\xce\x2e\xae\x6e\xee\x1e\x9e\x5e\xde\x3e\xbe\x7e\xfe" + + "\x01\x81\x41\xc1\x21\xa1\x61\xe1\x11\x91\x51\xd1\x31\xb1\x71\xf1" + + "\x09\x89\x49\xc9\x29\xa9\x69\xe9\x19\x99\x59\xd9\x39\xb9\x79\xf9" + + "\x05\x85\x45\xc5\x25\xa5\x65\xe5\x15\x95\x55\xd5\x35\xb5\x75\xf5" + + "\x0d\x8d\x4d\xcd\x2d\xad\x6d\xed\x1d\x9d\x5d\xdd\x3d\xbd\x7d\xfd" + + "\x03\x83\x43\xc3\x23\xa3\x63\xe3\x13\x93\x53\xd3\x33\xb3\x73\xf3" + + "\x0b\x8b\x4b\xcb\x2b\xab\x6b\xeb\x1b\x9b\x5b\xdb\x3b\xbb\x7b\xfb" + + "\x07\x87\x47\xc7\x27\xa7\x67\xe7\x17\x97\x57\xd7\x37\xb7\x77\xf7" + + "\x0f\x8f\x4f\xcf\x2f\xaf\x6f\xef\x1f\x9f\x5f\xdf\x3f\xbf\x7f\xff" -var len8tab = [256]uint8{ - 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, - 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, - 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, -} +const len8tab = "" + + "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" + + "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" + + "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" + + "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + + "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" diff --git a/libgo/go/math/bits/make_examples.go b/libgo/go/math/bits/make_examples.go index 1d3ad53..ac4004d 100644 --- a/libgo/go/math/bits/make_examples.go +++ b/libgo/go/math/bits/make_examples.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This program generates example_test.go. diff --git a/libgo/go/math/bits/make_tables.go b/libgo/go/math/bits/make_tables.go index b068d5e..867025e 100644 --- a/libgo/go/math/bits/make_tables.go +++ b/libgo/go/math/bits/make_tables.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This program generates bits_tables.go. @@ -47,16 +48,15 @@ func main() { } func gen(w io.Writer, name string, f func(uint8) uint8) { - fmt.Fprintf(w, "var %s = [256]uint8{", name) + // Use a const string to allow the compiler to constant-evaluate lookups at constant index. + fmt.Fprintf(w, "const %s = \"\"+\n\"", name) for i := 0; i < 256; i++ { - if i%16 == 0 { - fmt.Fprint(w, "\n\t") - } else { - fmt.Fprint(w, " ") + fmt.Fprintf(w, "\\x%02x", f(uint8(i))) + if i%16 == 15 && i != 255 { + fmt.Fprint(w, "\"+\n\"") } - fmt.Fprintf(w, "%#02x,", f(uint8(i))) } - fmt.Fprint(w, "\n}\n\n") + fmt.Fprint(w, "\"\n\n") } func ntz8(x uint8) (n uint8) { diff --git a/libgo/go/math/cmplx/huge_test.go b/libgo/go/math/cmplx/huge_test.go index f8e60c2..78b4231 100644 --- a/libgo/go/math/cmplx/huge_test.go +++ b/libgo/go/math/cmplx/huge_test.go @@ -5,6 +5,7 @@ // Disabled for s390x because it uses assembly routines that are not // accurate for huge arguments. +//go:build !s390x // +build !s390x package cmplx diff --git a/libgo/go/math/const.go b/libgo/go/math/const.go index 0fc8715..5ea935f 100644 --- a/libgo/go/math/const.go +++ b/libgo/go/math/const.go @@ -28,15 +28,19 @@ const ( // Max is the largest finite value representable by the type. // SmallestNonzero is the smallest positive, non-zero value representable by the type. const ( - MaxFloat32 = 3.40282346638528859811704183484516925440e+38 // 2**127 * (2**24 - 1) / 2**23 - SmallestNonzeroFloat32 = 1.401298464324817070923729583289916131280e-45 // 1 / 2**(127 - 1 + 23) + MaxFloat32 = 0x1p127 * (1 + (1 - 0x1p-23)) // 3.40282346638528859811704183484516925440e+38 + SmallestNonzeroFloat32 = 0x1p-126 * 0x1p-23 // 1.401298464324817070923729583289916131280e-45 - MaxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52 - SmallestNonzeroFloat64 = 4.940656458412465441765687928682213723651e-324 // 1 / 2**(1023 - 1 + 52) + MaxFloat64 = 0x1p1023 * (1 + (1 - 0x1p-52)) // 1.79769313486231570814527423731704356798070e+308 + SmallestNonzeroFloat64 = 0x1p-1022 * 0x1p-52 // 4.9406564584124654417656879286822137236505980e-324 ) // Integer limit values. const ( + intSize = 32 << (^uint(0) >> 63) // 32 or 64 + + MaxInt = 1<<(intSize-1) - 1 + MinInt = -1 << (intSize - 1) MaxInt8 = 1<<7 - 1 MinInt8 = -1 << 7 MaxInt16 = 1<<15 - 1 @@ -45,6 +49,7 @@ const ( MinInt32 = -1 << 31 MaxInt64 = 1<<63 - 1 MinInt64 = -1 << 63 + MaxUint = 1<<intSize - 1 MaxUint8 = 1<<8 - 1 MaxUint16 = 1<<16 - 1 MaxUint32 = 1<<32 - 1 diff --git a/libgo/go/math/const_test.go b/libgo/go/math/const_test.go new file mode 100644 index 0000000..170ba6a --- /dev/null +++ b/libgo/go/math/const_test.go @@ -0,0 +1,47 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math_test + +import ( + "testing" + + . "math" +) + +func TestMaxUint(t *testing.T) { + if v := uint(MaxUint); v+1 != 0 { + t.Errorf("MaxUint should wrap around to zero: %d", v+1) + } + if v := uint8(MaxUint8); v+1 != 0 { + t.Errorf("MaxUint8 should wrap around to zero: %d", v+1) + } + if v := uint16(MaxUint16); v+1 != 0 { + t.Errorf("MaxUint16 should wrap around to zero: %d", v+1) + } + if v := uint32(MaxUint32); v+1 != 0 { + t.Errorf("MaxUint32 should wrap around to zero: %d", v+1) + } + if v := uint64(MaxUint64); v+1 != 0 { + t.Errorf("MaxUint64 should wrap around to zero: %d", v+1) + } +} + +func TestMaxInt(t *testing.T) { + if v := int(MaxInt); v+1 != MinInt { + t.Errorf("MaxInt should wrap around to MinInt: %d", v+1) + } + if v := int8(MaxInt8); v+1 != MinInt8 { + t.Errorf("MaxInt8 should wrap around to MinInt8: %d", v+1) + } + if v := int16(MaxInt16); v+1 != MinInt16 { + t.Errorf("MaxInt16 should wrap around to MinInt16: %d", v+1) + } + if v := int32(MaxInt32); v+1 != MinInt32 { + t.Errorf("MaxInt32 should wrap around to MinInt32: %d", v+1) + } + if v := int64(MaxInt64); v+1 != MinInt64 { + t.Errorf("MaxInt64 should wrap around to MinInt64: %d", v+1) + } +} diff --git a/libgo/go/math/dim.go b/libgo/go/math/dim.go index cf06f30..6a857bb 100644 --- a/libgo/go/math/dim.go +++ b/libgo/go/math/dim.go @@ -33,6 +33,9 @@ func Dim(x, y float64) float64 { // Max(+0, ±0) = Max(±0, +0) = +0 // Max(-0, -0) = -0 func Max(x, y float64) float64 { + if haveArchMax { + return archMax(x, y) + } return max(x, y) } @@ -62,6 +65,9 @@ func max(x, y float64) float64 { // Min(x, NaN) = Min(NaN, x) = NaN // Min(-0, ±0) = Min(±0, -0) = -0 func Min(x, y float64) float64 { + if haveArchMin { + return archMin(x, y) + } return min(x, y) } diff --git a/libgo/go/math/dim_noasm.go b/libgo/go/math/dim_noasm.go new file mode 100644 index 0000000..9a052c0 --- /dev/null +++ b/libgo/go/math/dim_noasm.go @@ -0,0 +1,20 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//-go:build !amd64 && !arm64 && !riscv64 && !s390x +// -build !amd64,!arm64,!riscv64,!s390x + +package math + +const haveArchMax = false + +func archMax(x, y float64) float64 { + panic("not implemented") +} + +const haveArchMin = false + +func archMin(x, y float64) float64 { + panic("not implemented") +} diff --git a/libgo/go/math/exp.go b/libgo/go/math/exp.go index f3d3cb6..7ab80b4 100644 --- a/libgo/go/math/exp.go +++ b/libgo/go/math/exp.go @@ -11,14 +11,13 @@ package math // Exp(NaN) = NaN // Very large values overflow to 0 or +Inf. // Very small values underflow to 1. - -//extern exp -func libc_exp(float64) float64 - func Exp(x float64) float64 { return libc_exp(x) } +//extern exp +func libc_exp(float64) float64 + // The original C code, the long comment, and the constants // below are from FreeBSD's /usr/src/lib/msun/src/e_exp.c // and came with this notice. The go code is a simplified @@ -139,6 +138,9 @@ func exp(x float64) float64 { // // Special cases are the same as Exp. func Exp2(x float64) float64 { + if haveArchExp2 { + return archExp2(x) + } return exp2(x) } diff --git a/libgo/go/math/exp2_noasm.go b/libgo/go/math/exp2_noasm.go new file mode 100644 index 0000000..9b81544 --- /dev/null +++ b/libgo/go/math/exp2_noasm.go @@ -0,0 +1,14 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//-go:build !arm64 +// -build !arm64 + +package math + +const haveArchExp2 = false + +func archExp2(x float64) float64 { + panic("not implemented") +} diff --git a/libgo/go/math/exp_asm.go b/libgo/go/math/exp_amd64.go index 11e0a61..654ccce 100644 --- a/libgo/go/math/exp_asm.go +++ b/libgo/go/math/exp_amd64.go @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build amd64 // +build amd64 -// +build ignore package math diff --git a/libgo/go/math/exp_noasm.go b/libgo/go/math/exp_noasm.go new file mode 100644 index 0000000..4f90604 --- /dev/null +++ b/libgo/go/math/exp_noasm.go @@ -0,0 +1,14 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//-go:build !amd64 && !arm64 && !s390x +// -build !amd64,!arm64,!s390x + +package math + +const haveArchExp = false + +func archExp(x float64) float64 { + panic("not implemented") +} diff --git a/libgo/go/math/expm1.go b/libgo/go/math/expm1.go index 9c6e080..bf685ae 100644 --- a/libgo/go/math/expm1.go +++ b/libgo/go/math/expm1.go @@ -121,10 +121,6 @@ package math // Expm1(-Inf) = -1 // Expm1(NaN) = NaN // Very large values overflow to -1 or +Inf. - -//extern expm1 -func libc_expm1(float64) float64 - func Expm1(x float64) float64 { if x == 0 { return x @@ -132,6 +128,9 @@ func Expm1(x float64) float64 { return libc_expm1(x) } +//extern expm1 +func libc_expm1(float64) float64 + func expm1(x float64) float64 { const ( Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF diff --git a/libgo/go/math/floor.go b/libgo/go/math/floor.go index 9115968..963c858 100644 --- a/libgo/go/math/floor.go +++ b/libgo/go/math/floor.go @@ -57,8 +57,10 @@ func ceil(x float64) float64 { // Trunc(±0) = ±0 // Trunc(±Inf) = ±Inf // Trunc(NaN) = NaN - func Trunc(x float64) float64 { + if haveArchTrunc { + return archTrunc(x) + } return trunc(x) } diff --git a/libgo/go/math/floor_noasm.go b/libgo/go/math/floor_noasm.go new file mode 100644 index 0000000..bcdc5b0 --- /dev/null +++ b/libgo/go/math/floor_noasm.go @@ -0,0 +1,26 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//-go:build !386 && !amd64 && !arm64 && !ppc64 && !ppc64le && !s390x && !wasm +// -build !386,!amd64,!arm64,!ppc64,!ppc64le,!s390x,!wasm + +package math + +const haveArchFloor = false + +func archFloor(x float64) float64 { + panic("not implemented") +} + +const haveArchCeil = false + +func archCeil(x float64) float64 { + panic("not implemented") +} + +const haveArchTrunc = false + +func archTrunc(x float64) float64 { + panic("not implemented") +} diff --git a/libgo/go/math/fma.go b/libgo/go/math/fma.go index db78dfa..ca0bf99 100644 --- a/libgo/go/math/fma.go +++ b/libgo/go/math/fma.go @@ -128,7 +128,7 @@ func FMA(x, y, z float64) float64 { pe -= int32(is62zero) // Swap addition operands so |p| >= |z| - if pe < ze || (pe == ze && (pm1 < zm1 || (pm1 == zm1 && pm2 < zm2))) { + if pe < ze || pe == ze && pm1 < zm1 { ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2 } diff --git a/libgo/go/math/frexp.go b/libgo/go/math/frexp.go index 4ad0aee..3c8a909 100644 --- a/libgo/go/math/frexp.go +++ b/libgo/go/math/frexp.go @@ -14,6 +14,9 @@ package math // Frexp(±Inf) = ±Inf, 0 // Frexp(NaN) = NaN, 0 func Frexp(f float64) (frac float64, exp int) { + if haveArchFrexp { + return archFrexp(f) + } return frexp(f) } diff --git a/libgo/go/math/huge_test.go b/libgo/go/math/huge_test.go index 9448edc..ec81a4a 100644 --- a/libgo/go/math/huge_test.go +++ b/libgo/go/math/huge_test.go @@ -5,6 +5,7 @@ // Disabled for s390x because it uses assembly routines that are not // accurate for huge arguments. +//go:build !s390x // +build !s390x package math_test diff --git a/libgo/go/math/log.go b/libgo/go/math/log.go index cf242e8..f1c609d 100644 --- a/libgo/go/math/log.go +++ b/libgo/go/math/log.go @@ -77,14 +77,13 @@ package math // Log(0) = -Inf // Log(x < 0) = NaN // Log(NaN) = NaN - -//extern log -func libc_log(float64) float64 - func Log(x float64) float64 { return libc_log(x) } +//extern log +func libc_log(float64) float64 + func log(x float64) float64 { const ( Ln2Hi = 6.93147180369123816490e-01 /* 3fe62e42 fee00000 */ diff --git a/libgo/go/math/mod.go b/libgo/go/math/mod.go index 436788f..e223c7a 100644 --- a/libgo/go/math/mod.go +++ b/libgo/go/math/mod.go @@ -18,14 +18,13 @@ package math // Mod(x, 0) = NaN // Mod(x, ±Inf) = x // Mod(x, NaN) = NaN - -//extern fmod -func libc_fmod(float64, float64) float64 - func Mod(x, y float64) float64 { return libc_fmod(x, y) } +//extern fmod +func libc_fmod(float64, float64) float64 + func mod(x, y float64) float64 { if y == 0 || IsInf(x, 0) || IsNaN(x) || IsNaN(y) { return NaN() diff --git a/libgo/go/math/modf.go b/libgo/go/math/modf.go index b59d4b7..bf08dc6 100644 --- a/libgo/go/math/modf.go +++ b/libgo/go/math/modf.go @@ -11,6 +11,9 @@ package math // Modf(±Inf) = ±Inf, NaN // Modf(NaN) = NaN, NaN func Modf(f float64) (int float64, frac float64) { + if haveArchModf { + return archModf(f) + } return modf(f) } diff --git a/libgo/go/math/modf_noasm.go b/libgo/go/math/modf_noasm.go new file mode 100644 index 0000000..42d4734 --- /dev/null +++ b/libgo/go/math/modf_noasm.go @@ -0,0 +1,14 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//-go:build !arm64 && !ppc64 && !ppc64le +// -build !arm64,!ppc64,!ppc64le + +package math + +const haveArchModf = false + +func archModf(f float64) (int float64, frac float64) { + panic("not implemented") +} diff --git a/libgo/go/math/rand/export_test.go b/libgo/go/math/rand/export_test.go new file mode 100644 index 0000000..560010b --- /dev/null +++ b/libgo/go/math/rand/export_test.go @@ -0,0 +1,17 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +func Int31nForTest(r *Rand, n int32) int32 { + return r.int31n(n) +} + +func GetNormalDistributionParameters() (float64, [128]uint32, [128]float32, [128]float32) { + return rn, kn, wn, fn +} + +func GetExponentialDistributionParameters() (float64, [256]uint32, [256]float32, [256]float32) { + return re, ke, we, fe +} diff --git a/libgo/go/math/rand/gen_cooked.go b/libgo/go/math/rand/gen_cooked.go index 0afc10d..7950e09 100644 --- a/libgo/go/math/rand/gen_cooked.go +++ b/libgo/go/math/rand/gen_cooked.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build ignore // +build ignore // This program computes the value of rngCooked in rng.go, diff --git a/libgo/go/math/rand/race_test.go b/libgo/go/math/rand/race_test.go index 186c716..e7d1036 100644 --- a/libgo/go/math/rand/race_test.go +++ b/libgo/go/math/rand/race_test.go @@ -2,9 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package rand +package rand_test import ( + . "math/rand" "sync" "testing" ) diff --git a/libgo/go/math/rand/rand.go b/libgo/go/math/rand/rand.go index d6422c9..13f20ca 100644 --- a/libgo/go/math/rand/rand.go +++ b/libgo/go/math/rand/rand.go @@ -2,7 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package rand implements pseudo-random number generators. +// Package rand implements pseudo-random number generators unsuitable for +// security-sensitive work. // // Random numbers are generated by a Source. Top-level functions, such as // Float64 and Int, use a default shared Source that produces a deterministic @@ -11,11 +12,9 @@ // The default Source is safe for concurrent use by multiple goroutines, but // Sources created by NewSource are not. // -// Mathematical interval notation such as [0, n) is used throughout the -// documentation for this package. -// -// For random numbers suitable for security-sensitive work, see the crypto/rand -// package. +// This package's outputs might be easily predictable regardless of how it's +// seeded. For random numbers suitable for security-sensitive work, see the +// crypto/rand package. package rand import "sync" @@ -104,7 +103,7 @@ func (r *Rand) Int() int { return int(u << 1 >> 1) // clear sign bit if int == int32 } -// Int63n returns, as an int64, a non-negative pseudo-random number in [0,n). +// Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n). // It panics if n <= 0. func (r *Rand) Int63n(n int64) int64 { if n <= 0 { @@ -121,7 +120,7 @@ func (r *Rand) Int63n(n int64) int64 { return v % n } -// Int31n returns, as an int32, a non-negative pseudo-random number in [0,n). +// Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n). // It panics if n <= 0. func (r *Rand) Int31n(n int32) int32 { if n <= 0 { @@ -138,7 +137,7 @@ func (r *Rand) Int31n(n int32) int32 { return v % n } -// int31n returns, as an int32, a non-negative pseudo-random number in [0,n). +// int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n). // n must be > 0, but int31n does not check this; the caller must ensure it. // int31n exists because Int31n is inefficient, but Go 1 compatibility // requires that the stream of values produced by math/rand remain unchanged. @@ -162,7 +161,7 @@ func (r *Rand) int31n(n int32) int32 { return int32(prod >> 32) } -// Intn returns, as an int, a non-negative pseudo-random number in [0,n). +// Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n). // It panics if n <= 0. func (r *Rand) Intn(n int) int { if n <= 0 { @@ -174,7 +173,7 @@ func (r *Rand) Intn(n int) int { return int(r.Int63n(int64(n))) } -// Float64 returns, as a float64, a pseudo-random number in [0.0,1.0). +// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0). func (r *Rand) Float64() float64 { // A clearer, simpler implementation would be: // return float64(r.Int63n(1<<53)) / (1<<53) @@ -200,7 +199,7 @@ again: return f } -// Float32 returns, as a float32, a pseudo-random number in [0.0,1.0). +// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0). func (r *Rand) Float32() float32 { // Same rationale as in Float64: we want to preserve the Go 1 value // stream except we want to fix it not to return 1.0 @@ -213,7 +212,8 @@ again: return f } -// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers [0,n). +// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers +// in the half-open interval [0,n). func (r *Rand) Perm(n int) []int { m := make([]int, n) // In the following loop, the iteration when i=0 always swaps m[0] with m[0]. @@ -321,31 +321,31 @@ func Int31() int32 { return globalRand.Int31() } // Int returns a non-negative pseudo-random int from the default Source. func Int() int { return globalRand.Int() } -// Int63n returns, as an int64, a non-negative pseudo-random number in [0,n) +// Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n) // from the default Source. // It panics if n <= 0. func Int63n(n int64) int64 { return globalRand.Int63n(n) } -// Int31n returns, as an int32, a non-negative pseudo-random number in [0,n) +// Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n) // from the default Source. // It panics if n <= 0. func Int31n(n int32) int32 { return globalRand.Int31n(n) } -// Intn returns, as an int, a non-negative pseudo-random number in [0,n) +// Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n) // from the default Source. // It panics if n <= 0. func Intn(n int) int { return globalRand.Intn(n) } -// Float64 returns, as a float64, a pseudo-random number in [0.0,1.0) +// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0) // from the default Source. func Float64() float64 { return globalRand.Float64() } -// Float32 returns, as a float32, a pseudo-random number in [0.0,1.0) +// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0) // from the default Source. func Float32() float32 { return globalRand.Float32() } -// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers [0,n) -// from the default Source. +// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers +// in the half-open interval [0,n) from the default Source. func Perm(n int) []int { return globalRand.Perm(n) } // Shuffle pseudo-randomizes the order of elements using the default Source. diff --git a/libgo/go/math/rand/rand_test.go b/libgo/go/math/rand/rand_test.go index e037aae..462de8b 100644 --- a/libgo/go/math/rand/rand_test.go +++ b/libgo/go/math/rand/rand_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package rand +package rand_test import ( "bytes" @@ -11,6 +11,7 @@ import ( "internal/testenv" "io" "math" + . "math/rand" "os" "runtime" "testing" @@ -21,6 +22,9 @@ const ( numTestSamples = 10000 ) +var rn, kn, wn, fn = GetNormalDistributionParameters() +var re, ke, we, fe = GetExponentialDistributionParameters() + type statsResults struct { mean float64 stddev float64 @@ -503,7 +507,7 @@ func TestUniformFactorial(t *testing.T) { fn func() int }{ {name: "Int31n", fn: func() int { return int(r.Int31n(int32(nfact))) }}, - {name: "int31n", fn: func() int { return int(r.int31n(int32(nfact))) }}, + {name: "int31n", fn: func() int { return int(Int31nForTest(r, int32(nfact))) }}, {name: "Perm", fn: func() int { return encodePerm(r.Perm(n)) }}, {name: "Shuffle", fn: func() int { // Generate permutation using Shuffle. diff --git a/libgo/go/math/remainder.go b/libgo/go/math/remainder.go index 0e2903e..bf8bfd5 100644 --- a/libgo/go/math/remainder.go +++ b/libgo/go/math/remainder.go @@ -35,6 +35,9 @@ package math // Remainder(x, ±Inf) = x // Remainder(x, NaN) = NaN func Remainder(x, y float64) float64 { + if haveArchRemainder { + return archRemainder(x, y) + } return remainder(x, y) } diff --git a/libgo/go/math/sin.go b/libgo/go/math/sin.go index 03fd14c..bd8b9a7 100644 --- a/libgo/go/math/sin.go +++ b/libgo/go/math/sin.go @@ -114,14 +114,13 @@ var _cos = [...]float64{ // Special cases are: // Cos(±Inf) = NaN // Cos(NaN) = NaN - -//extern cos -func libc_cos(float64) float64 - func Cos(x float64) float64 { return libc_cos(x) } +//extern cos +func libc_cos(float64) float64 + func cos(x float64) float64 { const ( PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts @@ -181,14 +180,13 @@ func cos(x float64) float64 { // Sin(±0) = ±0 // Sin(±Inf) = NaN // Sin(NaN) = NaN - -//extern sin -func libc_sin(float64) float64 - func Sin(x float64) float64 { return libc_sin(x) } +//extern sin +func libc_sin(float64) float64 + func sin(x float64) float64 { const ( PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts diff --git a/libgo/go/math/sinh.go b/libgo/go/math/sinh.go index 993424d..9fe9b4e 100644 --- a/libgo/go/math/sinh.go +++ b/libgo/go/math/sinh.go @@ -23,6 +23,13 @@ package math // Sinh(±Inf) = ±Inf // Sinh(NaN) = NaN func Sinh(x float64) float64 { + if haveArchSinh { + return archSinh(x) + } + return sinh(x) +} + +func sinh(x float64) float64 { // The coefficients are #2029 from Hart & Cheney. (20.36D) const ( P0 = -0.6307673640497716991184787251e+6 @@ -68,6 +75,13 @@ func Sinh(x float64) float64 { // Cosh(±Inf) = +Inf // Cosh(NaN) = NaN func Cosh(x float64) float64 { + if haveArchCosh { + return archCosh(x) + } + return cosh(x) +} + +func cosh(x float64) float64 { x = Abs(x) if x > 21 { return Exp(x) * 0.5 diff --git a/libgo/go/math/stubs.go b/libgo/go/math/stubs.go new file mode 100644 index 0000000..fe56800 --- /dev/null +++ b/libgo/go/math/stubs.go @@ -0,0 +1,161 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//-go:build !s390x +// -build !s390x + +// This is a large group of functions that most architectures don't +// implement in assembly. + +package math + +const haveArchAcos = false + +func archAcos(x float64) float64 { + panic("not implemented") +} + +const haveArchAcosh = false + +func archAcosh(x float64) float64 { + panic("not implemented") +} + +const haveArchAsin = false + +func archAsin(x float64) float64 { + panic("not implemented") +} + +const haveArchAsinh = false + +func archAsinh(x float64) float64 { + panic("not implemented") +} + +const haveArchAtan = false + +func archAtan(x float64) float64 { + panic("not implemented") +} + +const haveArchAtan2 = false + +func archAtan2(y, x float64) float64 { + panic("not implemented") +} + +const haveArchAtanh = false + +func archAtanh(x float64) float64 { + panic("not implemented") +} + +const haveArchCbrt = false + +func archCbrt(x float64) float64 { + panic("not implemented") +} + +const haveArchCos = false + +func archCos(x float64) float64 { + panic("not implemented") +} + +const haveArchCosh = false + +func archCosh(x float64) float64 { + panic("not implemented") +} + +const haveArchErf = false + +func archErf(x float64) float64 { + panic("not implemented") +} + +const haveArchErfc = false + +func archErfc(x float64) float64 { + panic("not implemented") +} + +const haveArchExpm1 = false + +func archExpm1(x float64) float64 { + panic("not implemented") +} + +const haveArchFrexp = false + +func archFrexp(x float64) (float64, int) { + panic("not implemented") +} + +const haveArchLdexp = false + +func archLdexp(frac float64, exp int) float64 { + panic("not implemented") +} + +const haveArchLog10 = false + +func archLog10(x float64) float64 { + panic("not implemented") +} + +const haveArchLog2 = false + +func archLog2(x float64) float64 { + panic("not implemented") +} + +const haveArchLog1p = false + +func archLog1p(x float64) float64 { + panic("not implemented") +} + +const haveArchMod = false + +func archMod(x, y float64) float64 { + panic("not implemented") +} + +const haveArchPow = false + +func archPow(x, y float64) float64 { + panic("not implemented") +} + +const haveArchRemainder = false + +func archRemainder(x, y float64) float64 { + panic("not implemented") +} + +const haveArchSin = false + +func archSin(x float64) float64 { + panic("not implemented") +} + +const haveArchSinh = false + +func archSinh(x float64) float64 { + panic("not implemented") +} + +const haveArchTan = false + +func archTan(x float64) float64 { + panic("not implemented") +} + +const haveArchTanh = false + +func archTanh(x float64) float64 { + panic("not implemented") +} diff --git a/libgo/go/math/tan.go b/libgo/go/math/tan.go index ca91d71..eea29d3 100644 --- a/libgo/go/math/tan.go +++ b/libgo/go/math/tan.go @@ -79,14 +79,13 @@ var _tanQ = [...]float64{ // Tan(±0) = ±0 // Tan(±Inf) = NaN // Tan(NaN) = NaN - -//extern tan -func libc_tan(float64) float64 - func Tan(x float64) float64 { return libc_tan(x) } +//extern tan +func libc_tan(float64) float64 + func tan(x float64) float64 { const ( PI4A = 7.85398125648498535156e-1 // 0x3fe921fb40000000, Pi/4 split into three parts diff --git a/libgo/go/math/tanh.go b/libgo/go/math/tanh.go index 009a206..a825678 100644 --- a/libgo/go/math/tanh.go +++ b/libgo/go/math/tanh.go @@ -72,6 +72,13 @@ var tanhQ = [...]float64{ // Tanh(±Inf) = ±1 // Tanh(NaN) = NaN func Tanh(x float64) float64 { + if haveArchTanh { + return archTanh(x) + } + return tanh(x) +} + +func tanh(x float64) float64 { const MAXLOG = 8.8029691931113054295988e+01 // log(2**127) z := Abs(x) switch { |