diff options
author | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-02-13 14:16:23 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-02-13 14:16:23 +0530 |
commit | 909279a5cfa938c989c9b01c8f48a0276291ec45 (patch) | |
tree | e3b9264b50baf0e4ca9d563f3acbdc56ff2870a4 | |
parent | bdf028142eb77d6ae59500db875068fa5d7b059d (diff) | |
download | glibc-909279a5cfa938c989c9b01c8f48a0276291ec45.zip glibc-909279a5cfa938c989c9b01c8f48a0276291ec45.tar.gz glibc-909279a5cfa938c989c9b01c8f48a0276291ec45.tar.bz2 |
Optimized mp multiplication
Don't bother multiplying zeroes since that only wastes cycles.
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/mpa.c | 56 |
2 files changed, 51 insertions, 8 deletions
@@ -1,5 +1,8 @@ 2013-02-13 Siddhesh Poyarekar <siddhesh@redhat.com> + * sysdeps/ieee754/dbl-64/mpa.c (__mul): Don't bother with zero + values in the mantissa. + * sysdeps/ieee754/dbl-64/mpa.c (add_magnitudes): Use ZK to minimize writes to Z. (sub_magnitudes): Simplify code a bit. diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c index 005aa0c..5b50b0d 100644 --- a/sysdeps/ieee754/dbl-64/mpa.c +++ b/sysdeps/ieee754/dbl-64/mpa.c @@ -610,7 +610,7 @@ void SECTION __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) { - int i, j, k, k2; + int i, j, k, ip, ip2; double u, zk; /* Is z=0? */ @@ -620,11 +620,51 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) return; } - /* Multiply, add and carry. */ - k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3; - zk = Z[k2] = ZERO; + /* We need not iterate through all X's and Y's since it's pointless to + multiply zeroes. Here, both are zero... */ + for (ip2 = p; ip2 > 0; ip2--) + if (X[ip2] != ZERO || Y[ip2] != ZERO) + break; - for (k = k2; k > p; k--) + /* ... and here, at least one of them is still zero. */ + for (ip = ip2; ip > 0; ip--) + if (X[ip] * Y[ip] != ZERO) + break; + + /* The product looks like this for p = 3 (as an example): + + + a1 a2 a3 + x b1 b2 b3 + ----------------------------- + a1*b3 a2*b3 a3*b3 + a1*b2 a2*b2 a3*b2 + a1*b1 a2*b1 a3*b1 + + So our K needs to ideally be P*2, but we're limiting ourselves to P + 3 + for P >= 3. We compute the above digits in two parts; the last P-1 + digits and then the first P digits. The last P-1 digits are a sum of + products of the input digits from P to P-k where K is 0 for the least + significant digit and increases as we go towards the left. The product + term is of the form X[k]*X[P-k] as can be seen in the above example. + + The first P digits are also a sum of products with the same product term, + except that the sum is from 1 to k. This is also evident from the above + example. + + Another thing that becomes evident is that only the most significant + ip+ip2 digits of the result are non-zero, where ip and ip2 are the + 'internal precision' of the input numbers, i.e. digits after ip and ip2 + are all ZERO. */ + + k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + + while (k > ip + ip2 + 1) + Z[k--] = ZERO; + + zk = Z[k] = ZERO; + + while (k > p) { for (i = k - p, j = p; i < p + 1; i++, j--) zk += X[i] * Y[j]; @@ -632,10 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) u = (zk + CUTTER) - CUTTER; if (u > zk) u -= RADIX; - Z[k] = zk - u; + Z[k--] = zk - u; zk = u * RADIXI; } + /* The real deal. */ while (k > 1) { for (i = 1, j = k - 1; i < k; i++, j--) @@ -644,9 +685,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) u = (zk + CUTTER) - CUTTER; if (u > zk) u -= RADIX; - Z[k] = zk - u; + Z[k--] = zk - u; zk = u * RADIXI; - k--; } Z[k] = zk; |