aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@redhat.com>2013-02-13 14:16:23 +0530
committerSiddhesh Poyarekar <siddhesh@redhat.com>2013-02-13 14:16:23 +0530
commit909279a5cfa938c989c9b01c8f48a0276291ec45 (patch)
treee3b9264b50baf0e4ca9d563f3acbdc56ff2870a4
parentbdf028142eb77d6ae59500db875068fa5d7b059d (diff)
downloadglibc-909279a5cfa938c989c9b01c8f48a0276291ec45.zip
glibc-909279a5cfa938c989c9b01c8f48a0276291ec45.tar.gz
glibc-909279a5cfa938c989c9b01c8f48a0276291ec45.tar.bz2
Optimized mp multiplication
Don't bother multiplying zeroes since that only wastes cycles.
-rw-r--r--ChangeLog3
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.c56
2 files changed, 51 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 9b224a4..743dfaf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2013-02-13 Siddhesh Poyarekar <siddhesh@redhat.com>
+ * sysdeps/ieee754/dbl-64/mpa.c (__mul): Don't bother with zero
+ values in the mantissa.
+
* sysdeps/ieee754/dbl-64/mpa.c (add_magnitudes): Use ZK to
minimize writes to Z.
(sub_magnitudes): Simplify code a bit.
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 005aa0c..5b50b0d 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -610,7 +610,7 @@ void
SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k, k2;
+ int i, j, k, ip, ip2;
double u, zk;
/* Is z=0? */
@@ -620,11 +620,51 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
return;
}
- /* Multiply, add and carry. */
- k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
- zk = Z[k2] = ZERO;
+ /* We need not iterate through all X's and Y's since it's pointless to
+ multiply zeroes. Here, both are zero... */
+ for (ip2 = p; ip2 > 0; ip2--)
+ if (X[ip2] != ZERO || Y[ip2] != ZERO)
+ break;
- for (k = k2; k > p; k--)
+ /* ... and here, at least one of them is still zero. */
+ for (ip = ip2; ip > 0; ip--)
+ if (X[ip] * Y[ip] != ZERO)
+ break;
+
+ /* The product looks like this for p = 3 (as an example):
+
+
+ a1 a2 a3
+ x b1 b2 b3
+ -----------------------------
+ a1*b3 a2*b3 a3*b3
+ a1*b2 a2*b2 a3*b2
+ a1*b1 a2*b1 a3*b1
+
+ So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
+ for P >= 3. We compute the above digits in two parts; the last P-1
+ digits and then the first P digits. The last P-1 digits are a sum of
+ products of the input digits from P to P-k where K is 0 for the least
+ significant digit and increases as we go towards the left. The product
+ term is of the form X[k]*X[P-k] as can be seen in the above example.
+
+ The first P digits are also a sum of products with the same product term,
+ except that the sum is from 1 to k. This is also evident from the above
+ example.
+
+ Another thing that becomes evident is that only the most significant
+ ip+ip2 digits of the result are non-zero, where ip and ip2 are the
+ 'internal precision' of the input numbers, i.e. digits after ip and ip2
+ are all ZERO. */
+
+ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+ while (k > ip + ip2 + 1)
+ Z[k--] = ZERO;
+
+ zk = Z[k] = ZERO;
+
+ while (k > p)
{
for (i = k - p, j = p; i < p + 1; i++, j--)
zk += X[i] * Y[j];
@@ -632,10 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
u = (zk + CUTTER) - CUTTER;
if (u > zk)
u -= RADIX;
- Z[k] = zk - u;
+ Z[k--] = zk - u;
zk = u * RADIXI;
}
+ /* The real deal. */
while (k > 1)
{
for (i = 1, j = k - 1; i < k; i++, j--)
@@ -644,9 +685,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
u = (zk + CUTTER) - CUTTER;
if (u > zk)
u -= RADIX;
- Z[k] = zk - u;
+ Z[k--] = zk - u;
zk = u * RADIXI;
- k--;
}
Z[k] = zk;