Commit acfe3a59 authored by Robert Griesemer's avatar Robert Griesemer

math/big: API cleanup

- better and more consistent documentation
- more functions implemented
- more tests

Change-Id: If4c591e7af4ec5434fbb411a48dd0f8add993720
Reviewed-on: https://go-review.googlesource.com/4140Reviewed-by: default avatarAlan Donovan <adonovan@google.com>
parent afac4f0a
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
// rounding mode of the result operand determines the rounding // rounding mode of the result operand determines the rounding
// mode of an operation. This is a from-scratch implementation. // mode of an operation. This is a from-scratch implementation.
// CAUTION: WORK IN PROGRESS - ANY ASPECT OF THIS IMPLEMENTATION MAY CHANGE! // CAUTION: WORK IN PROGRESS - USE AT YOUR OWN RISK.
package big package big
...@@ -20,42 +20,36 @@ import ( ...@@ -20,42 +20,36 @@ import (
const debugFloat = true // enable for debugging const debugFloat = true // enable for debugging
// Internal representation: A floating-point value x != 0 consists // A Float represents a multi-precision floating point number of the form
// of a sign (x.neg), mantissa (x.mant), and exponent (x.exp) such
// that
// //
// x = sign * 0.mantissa * 2**exponent // sign * mantissa * 2**exponent
//
// and the mantissa is interpreted as a value between 0.5 and 1:
//
// 0.5 <= mantissa < 1.0
// //
// The mantissa bits are stored in the shortest nat slice long enough // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp (with the
// to hold x.prec mantissa bits. The mantissa is normalized such that // exception of 0 and Inf which have a 0 mantissa and special exponents).
// the msb of x.mant == 1. Thus, if the precision is not a multiple of
// the Word size _W, x.mant[0] contains trailing zero bits. The number
// 0 is represented by an empty mantissa and a zero exponent.
// A Float represents a multi-precision floating point number
// of the form
// //
// sign * mantissa * 2**exponent // Each Float value also has a precision, rounding mode, and accuracy.
// //
// Each value also has a precision, rounding mode, and accuracy value.
// The precision is the number of mantissa bits used to represent the // The precision is the number of mantissa bits used to represent the
// value, and the result of an operation is rounded to that many bits // value. The rounding mode specifies how a result should be rounded
// according to the value's rounding mode (unless specified otherwise). // to fit into the mantissa bits, and accuracy describes the rounding
// The accuracy value indicates the rounding error with respect to the // error with respect to the exact result.
// exact (not rounded) value.
// //
// The zero (uninitialized) value for a Float is ready to use and // All operations, including setters, that specify a *Float for the result,
// represents the number 0.0 of 0 bit precision. // usually via the receiver, round their result to the result's precision
// and according to its rounding mode, unless specified otherwise. If the
// result precision is 0 (see below), it is set to the precision of the
// argument with the largest precision value before any rounding takes
// place.
// TODO(gri) should the rounding mode also be copied in this case?
// //
// By setting the desired precision to 24 (or 53) and using ToNearestEven // By setting the desired precision to 24 or 53 and using ToNearestEven
// rounding, Float arithmetic operations emulate the corresponding float32 // rounding, Float operations produce the same results as the corresponding
// or float64 IEEE-754 operations (except for denormalized numbers and NaNs). // float32 or float64 IEEE-754 arithmetic for normalized operands (no NaNs
// or denormalized numbers). Additionally, positive and negative zeros and
// infinities are fully supported.
// //
// CAUTION: THIS IS WORK IN PROGRESS - USE AT YOUR OWN RISK. // The zero (uninitialized) value for a Float is ready to use and
// represents the number +0.0 of 0 bit precision.
// //
type Float struct { type Float struct {
mode RoundingMode mode RoundingMode
...@@ -66,12 +60,20 @@ type Float struct { ...@@ -66,12 +60,20 @@ type Float struct {
prec uint // TODO(gri) make this a 32bit field prec uint // TODO(gri) make this a 32bit field
} }
// Internal representation details: The mantissa bits x.mant of a Float x
// are stored in the shortest nat slice long enough to hold x.prec bits.
// Unless x is a zero or an infinity, x.mant is normalized such that the
// msb of x.mant == 1. Thus, if the precision is not a multiple of the
// the Word size _W, x.mant[0] contains trailing zero bits. Zero and Inf
// values have an empty mantissa and a 0 or infExp exponent, respectively.
// NewFloat returns a new Float with value x rounded // NewFloat returns a new Float with value x rounded
// to prec bits according to the given rounding mode. // to prec bits according to the given rounding mode.
// If prec == 0, the result has value 0.0 independent // If prec == 0, the result has value 0.0 independent
// of the value of x. // of the value of x.
// BUG(gri) For prec == 0 and x == Inf, the result // BUG(gri) For prec == 0 and x == Inf, the result
// should be Inf as well. // should be Inf as well.
// TODO(gri) rethink this signature.
func NewFloat(x float64, prec uint, mode RoundingMode) *Float { func NewFloat(x float64, prec uint, mode RoundingMode) *Float {
var z Float var z Float
if prec > 0 { if prec > 0 {
...@@ -83,30 +85,17 @@ func NewFloat(x float64, prec uint, mode RoundingMode) *Float { ...@@ -83,30 +85,17 @@ func NewFloat(x float64, prec uint, mode RoundingMode) *Float {
return &z return &z
} }
// Special exponent values.
const ( const (
maxExp = math.MaxInt32 MaxExp = math.MaxInt32 // largest supported exponent magnitude
infExp = -maxExp - 1 // exponent value for Inf values infExp = -MaxExp - 1 // exponent for Inf values
) )
// NewInf returns a new Float with value positive infinity (sign >= 0), // NewInf returns a new infinite Float value with value +Inf (sign >= 0),
// or negative infinity (sign < 0). // or -Inf (sign < 0).
func NewInf(sign int) *Float { func NewInf(sign int) *Float {
return &Float{neg: sign < 0, exp: infExp} return &Float{neg: sign < 0, exp: infExp}
} }
// setExp sets the exponent for z.
// If the exponent is too small or too large, z becomes +/-Inf.
func (z *Float) setExp(e int64) {
if -maxExp <= e && e <= maxExp {
z.exp = int32(e)
return
}
// Inf
z.mant = z.mant[:0]
z.exp = infExp
}
// Accuracy describes the rounding error produced by the most recent // Accuracy describes the rounding error produced by the most recent
// operation that generated a Float value, relative to the exact value: // operation that generated a Float value, relative to the exact value:
// //
...@@ -191,11 +180,29 @@ func (x *Float) IsInf(sign int) bool { ...@@ -191,11 +180,29 @@ func (x *Float) IsInf(sign int) bool {
return x.exp == infExp && (sign == 0 || x.neg == (sign < 0)) return x.exp == infExp && (sign == 0 || x.neg == (sign < 0))
} }
// setExp sets the exponent for z.
// If the exponent's magnitude is too large, z becomes +/-Inf.
func (z *Float) setExp(e int64) {
if -MaxExp <= e && e <= MaxExp {
z.exp = int32(e)
return
}
// Inf
z.mant = z.mant[:0]
z.exp = infExp
}
// debugging support // debugging support
func (x *Float) validate() { func (x *Float) validate() {
// assumes x != 0 && x != Inf
const msb = 1 << (_W - 1) const msb = 1 << (_W - 1)
m := len(x.mant) m := len(x.mant)
if m == 0 {
// 0.0 or Inf
if x.exp != 0 && x.exp != infExp {
panic(fmt.Sprintf("empty matissa with invalid exponent %d", x.exp))
}
return
}
if x.mant[m-1]&msb == 0 { if x.mant[m-1]&msb == 0 {
panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Format('p', 0))) panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Format('p', 0)))
} }
...@@ -206,24 +213,24 @@ func (x *Float) validate() { ...@@ -206,24 +213,24 @@ func (x *Float) validate() {
// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
// sbit must be 0 or 1 and summarizes any "sticky bit" information one might // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
// have before calling round. z's mantissa must be normalized, with the msb set. // have before calling round. z's mantissa must be normalized (with the msb set)
// or empty.
func (z *Float) round(sbit uint) { func (z *Float) round(sbit uint) {
z.acc = Exact z.acc = Exact
// handle zero // handle zero and Inf
m := uint(len(z.mant)) // mantissa length in words for current precision m := uint(len(z.mant)) // mantissa length in words for current precision
if m == 0 { if m == 0 {
z.exp = 0 if z.exp != infExp {
z.exp = 0
}
return return
} }
// z.prec > 0
// handle Inf
// TODO(gri) handle Inf
if debugFloat { if debugFloat {
z.validate() z.validate()
} }
// z.prec > 0
bits := m * _W // available mantissa bits bits := m * _W // available mantissa bits
if bits == z.prec { if bits == z.prec {
...@@ -366,6 +373,8 @@ func (z *Float) round(sbit uint) { ...@@ -366,6 +373,8 @@ func (z *Float) round(sbit uint) {
} }
// Round sets z to the value of x rounded according to mode to prec bits and returns z. // Round sets z to the value of x rounded according to mode to prec bits and returns z.
// TODO(gri) rethink this signature.
// TODO(gri) adjust this to match precision semantics.
func (z *Float) Round(x *Float, prec uint, mode RoundingMode) *Float { func (z *Float) Round(x *Float, prec uint, mode RoundingMode) *Float {
z.Set(x) z.Set(x)
z.prec = prec z.prec = prec
...@@ -393,24 +402,33 @@ func nlz64(x uint64) uint { ...@@ -393,24 +402,33 @@ func nlz64(x uint64) uint {
panic("unreachable") panic("unreachable")
} }
// SetUint64 sets z to x and returns z. // SetUint64 sets z to the (possibly rounded) value of x and returns z.
// Precision is set to 64 bits. // If z's precision is 0, it is changed to 64 (and rounding will have
// no effect).
func (z *Float) SetUint64(x uint64) *Float { func (z *Float) SetUint64(x uint64) *Float {
if z.prec == 0 {
z.prec = 64
}
z.acc = Exact
z.neg = false z.neg = false
z.prec = 64
if x == 0 { if x == 0 {
z.mant = z.mant[:0] z.mant = z.mant[:0]
z.exp = 0 z.exp = 0
return z return z
} }
// x != 0
s := nlz64(x) s := nlz64(x)
z.mant = z.mant.setUint64(x << s) z.mant = z.mant.setUint64(x << s)
z.exp = int32(64 - s) z.exp = int32(64 - s) // always fits
if z.prec < 64 {
z.round(0)
}
return z return z
} }
// SetInt64 sets z to x and returns z. // SetInt64 sets z to the (possibly rounded) value of x and returns z.
// Precision is set to 64 bits. // If z's precision is 0, it is changed to 64 (and rounding will have
// no effect).
func (z *Float) SetInt64(x int64) *Float { func (z *Float) SetInt64(x int64) *Float {
u := x u := x
if u < 0 { if u < 0 {
...@@ -421,12 +439,17 @@ func (z *Float) SetInt64(x int64) *Float { ...@@ -421,12 +439,17 @@ func (z *Float) SetInt64(x int64) *Float {
return z return z
} }
// SetFloat64 sets z to x and returns z. // SetInt64 sets z to the (possibly rounded) value of x and returns z.
// Precision is set to 53 bits. // If z's precision is 0, it is changed to 53 (and rounding will have
// TODO(gri) test denormals, disallow NaN. // no effect).
// If x is denormalized or NaN, the result is unspecified.
// TODO(gri) should return nil in those cases
func (z *Float) SetFloat64(x float64) *Float { func (z *Float) SetFloat64(x float64) *Float {
z.neg = math.Signbit(x) // handle -0 correctly (-0 == 0) if z.prec == 0 {
z.prec = 53 z.prec = 53
}
z.acc = Exact
z.neg = math.Signbit(x) // handle -0 correctly
if math.IsInf(x, 0) { if math.IsInf(x, 0) {
z.mant = z.mant[:0] z.mant = z.mant[:0]
z.exp = infExp z.exp = infExp
...@@ -437,16 +460,19 @@ func (z *Float) SetFloat64(x float64) *Float { ...@@ -437,16 +460,19 @@ func (z *Float) SetFloat64(x float64) *Float {
z.exp = 0 z.exp = 0
return z return z
} }
// x != 0
fmant, exp := math.Frexp(x) // get normalized mantissa fmant, exp := math.Frexp(x) // get normalized mantissa
z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
z.exp = int32(exp) z.exp = int32(exp) // always fits
if z.prec < 53 {
z.round(0)
}
return z return z
} }
// fnorm normalizes mantissa m by shifting it to the left // fnorm normalizes mantissa m by shifting it to the left
// such that the msb of the most-significant word (msw) // such that the msb of the most-significant word (msw) is 1.
// is 1. It returns the shift amount. // It returns the shift amount. It assumes that len(m) != 0.
// It assumes that m is not the zero nat.
func fnorm(m nat) uint { func fnorm(m nat) uint {
if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
panic("msw of mantissa is 0") panic("msw of mantissa is 0")
...@@ -461,32 +487,52 @@ func fnorm(m nat) uint { ...@@ -461,32 +487,52 @@ func fnorm(m nat) uint {
return s return s
} }
// SetInt sets z to x and returns z. // SetInt sets z to the (possibly rounded) value of x and returns z.
// Precision is set to the number of bits required to represent x accurately. // If z's precision is 0, it is changed to x.BitLen() (and rounding will have
// TODO(gri) what about precision for x == 0? // no effect).
func (z *Float) SetInt(x *Int) *Float { func (z *Float) SetInt(x *Int) *Float {
// TODO(gri) can be more efficient if z.prec > 0
// but small compared to the size of x, or if there
// are many trailing 0's.
bits := uint(x.BitLen())
if z.prec == 0 {
z.prec = bits
}
z.acc = Exact
z.neg = x.neg
if len(x.abs) == 0 { if len(x.abs) == 0 {
z.neg = false
z.mant = z.mant[:0] z.mant = z.mant[:0]
z.exp = 0 z.exp = 0
// z.prec = ?
return z return z
} }
// x != 0 // x != 0
z.neg = x.neg
z.mant = z.mant.set(x.abs) z.mant = z.mant.set(x.abs)
e := uint(len(z.mant))*_W - fnorm(z.mant) fnorm(z.mant)
z.exp = int32(e) z.setExp(int64(bits))
z.prec = e if z.prec < bits {
z.round(0)
}
return z return z
} }
// SetRat sets z to x rounded to the precision of z and returns z. // SetRat sets z to the (possibly rounded) value of x and returns z.
func (z *Float) SetRat(x *Rat, prec uint) *Float { // If z's precision is 0, it is changed to the larger of a.BitLen()
panic("unimplemented") // and b.BitLen(), where a and b are the numerator and denominator
// of x, respectively (x = a/b).
func (z *Float) SetRat(x *Rat) *Float {
// TODO(gri) can be more efficient if x is an integer
var a, b Float
a.SetInt(x.Num())
b.SetInt(x.Denom())
if z.prec == 0 {
// TODO(gri) think about a.prec type to avoid excessive conversions
z.prec = uint(max(int(a.prec), int(b.prec)))
}
return z.Quo(&a, &b)
} }
// Set sets z to x, with the same precision as x, and returns z. // Set sets z to x, with the same precision as x, and returns z.
// TODO(gri) adjust this to match precision semantics.
func (z *Float) Set(x *Float) *Float { func (z *Float) Set(x *Float) *Float {
if z != x { if z != x {
z.neg = x.neg z.neg = x.neg
...@@ -584,7 +630,7 @@ func (x *Float) IsInt() bool { ...@@ -584,7 +630,7 @@ func (x *Float) IsInt() bool {
} }
// Abs sets z to |x| (the absolute value of x) and returns z. // Abs sets z to |x| (the absolute value of x) and returns z.
// TODO(gri) should Abs (and Neg) below ignore z's precision and rounding mode? // TODO(gri) adjust this to match precision semantics.
func (z *Float) Abs(x *Float) *Float { func (z *Float) Abs(x *Float) *Float {
z.Set(x) z.Set(x)
z.neg = false z.neg = false
...@@ -592,6 +638,7 @@ func (z *Float) Abs(x *Float) *Float { ...@@ -592,6 +638,7 @@ func (z *Float) Abs(x *Float) *Float {
} }
// Neg sets z to x with its sign negated, and returns z. // Neg sets z to x with its sign negated, and returns z.
// TODO(gri) adjust this to match precision semantics.
func (z *Float) Neg(x *Float) *Float { func (z *Float) Neg(x *Float) *Float {
z.Set(x) z.Set(x)
z.neg = !z.neg z.neg = !z.neg
...@@ -803,8 +850,8 @@ func (x *Float) ucmp(y *Float) int { ...@@ -803,8 +850,8 @@ func (x *Float) ucmp(y *Float) int {
// sign as x even when x is zero. // sign as x even when x is zero.
// Add sets z to the rounded sum x+y and returns z. // Add sets z to the rounded sum x+y and returns z.
// If z's precision is 0, it is set to the larger of // If z's precision is 0, it is changed to the larger
// x's or y's precision before the operation. // of x's or y's precision before the operation.
// Rounding is performed according to z's precision // Rounding is performed according to z's precision
// and rounding mode; and z's accuracy reports the // and rounding mode; and z's accuracy reports the
// result error relative to the exact (not rounded) // result error relative to the exact (not rounded)
...@@ -938,7 +985,7 @@ func (z *Float) Quo(x, y *Float) *Float { ...@@ -938,7 +985,7 @@ func (z *Float) Quo(x, y *Float) *Float {
} }
// Lsh sets z to the rounded x * (1<<s) and returns z. // Lsh sets z to the rounded x * (1<<s) and returns z.
// If z's precision is 0, it is set to x's precision. // If z's precision is 0, it is changed to x's precision.
// Rounding is performed according to z's precision // Rounding is performed according to z's precision
// and rounding mode; and z's accuracy reports the // and rounding mode; and z's accuracy reports the
// result error relative to the exact (not rounded) // result error relative to the exact (not rounded)
......
...@@ -262,9 +262,21 @@ func TestFloatSetUint64(t *testing.T) { ...@@ -262,9 +262,21 @@ func TestFloatSetUint64(t *testing.T) {
1 << 32, 1 << 32,
1<<64 - 1, 1<<64 - 1,
} { } {
f := new(Float).SetUint64(want) var f Float
f.SetUint64(want)
if got := f.Uint64(); got != want { if got := f.Uint64(); got != want {
t.Errorf("got %d (%s); want %d", got, f.Format('p', 0), want) t.Errorf("got %#x (%s); want %#x", got, f.Format('p', 0), want)
}
}
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x uint64 = 0x8765432187654321 // 64 bits needed
for prec := uint(1); prec <= 64; prec++ {
f := NewFloat(0, prec, ToZero).SetUint64(x)
got := f.Uint64()
want := x &^ (1<<(64-prec) - 1) // cut off (round to zero) low 64-prec bits
if got != want {
t.Errorf("got %#x (%s); want %#x", got, f.Format('p', 0), want)
} }
} }
} }
...@@ -284,12 +296,24 @@ func TestFloatSetInt64(t *testing.T) { ...@@ -284,12 +296,24 @@ func TestFloatSetInt64(t *testing.T) {
if i&1 != 0 { if i&1 != 0 {
want = -want want = -want
} }
f := new(Float).SetInt64(want) var f Float
f.SetInt64(want)
if got := f.Int64(); got != want { if got := f.Int64(); got != want {
t.Errorf("got %d (%s); want %d", got, f.Format('p', 0), want) t.Errorf("got %#x (%s); want %#x", got, f.Format('p', 0), want)
} }
} }
} }
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x int64 = 0x7654321076543210 // 63 bits needed
for prec := uint(1); prec <= 63; prec++ {
f := NewFloat(0, prec, ToZero).SetInt64(x)
got := f.Int64()
want := x &^ (1<<(63-prec) - 1) // cut off (round to zero) low 63-prec bits
if got != want {
t.Errorf("got %#x (%s); want %#x", got, f.Format('p', 0), want)
}
}
} }
func TestFloatSetFloat64(t *testing.T) { func TestFloatSetFloat64(t *testing.T) {
...@@ -311,16 +335,77 @@ func TestFloatSetFloat64(t *testing.T) { ...@@ -311,16 +335,77 @@ func TestFloatSetFloat64(t *testing.T) {
if i&1 != 0 { if i&1 != 0 {
want = -want want = -want
} }
f := new(Float).SetFloat64(want) var f Float
f.SetFloat64(want)
if got, _ := f.Float64(); got != want { if got, _ := f.Float64(); got != want {
t.Errorf("got %g (%s); want %g", got, f.Format('p', 0), want) t.Errorf("got %g (%s); want %g", got, f.Format('p', 0), want)
} }
} }
} }
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x uint64 = 0x8765432143218 // 53 bits needed
for prec := uint(1); prec <= 52; prec++ {
f := NewFloat(0, prec, ToZero).SetFloat64(float64(x))
got, _ := f.Float64()
want := float64(x &^ (1<<(52-prec) - 1)) // cut off (round to zero) low 53-prec bits
if got != want {
t.Errorf("got %g (%s); want %g", got, f.Format('p', 0), want)
}
}
} }
func TestFloatSetInt(t *testing.T) { func TestFloatSetInt(t *testing.T) {
// TODO(gri) implement for _, want := range []string{
"0",
"1",
"-1",
"1234567890",
"123456789012345678901234567890",
"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890",
} {
var x Int
_, ok := x.SetString(want, 0)
if !ok {
t.Errorf("invalid integer %s", want)
continue
}
var f Float
f.SetInt(&x)
got := f.Format('g', 100)
if got != want {
t.Errorf("got %s (%s); want %s", got, f.Format('p', 0), want)
}
}
// TODO(gri) test basic rounding behavior
}
func TestFloatSetRat(t *testing.T) {
for _, want := range []string{
"0",
"1",
"-1",
"1234567890",
"123456789012345678901234567890",
"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890",
"1.2",
"3.14159265",
// TODO(gri) expand
} {
var x Rat
_, ok := x.SetString(want)
if !ok {
t.Errorf("invalid fraction %s", want)
continue
}
f := NewFloat(0, 1000, 0) // set a high precision - TODO(gri) find a cleaner way
f.SetRat(&x)
got := f.Format('g', 100)
if got != want {
t.Errorf("got %s (%s); want %s", got, f.Format('p', 0), want)
}
}
} }
// Selected precisions with which to run various tests. // Selected precisions with which to run various tests.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment