17 files changed, 740 insertions, 122 deletions
diff --git a/ChangeLog b/ChangeLog
index 8463b90..f38e61d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,48 @@
+2007-03-27  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #3306]
+	* math/math_private.h (math_opt_barrier, math_force_eval): Define.
+	* sysdeps/i386/fpu/math_private.h: New file.
+	* sysdeps/x86_64/fpu/math_private.h: New file.
+	* math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and
+	math_force_eval macros.  Use "+m" constraint on asm rather than
+	"=m" and "m".
+	* math/s_nextafter.c (__nextafter): Likewise.
+	* sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward):
+	Likewise.
+	* sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise.
+	* sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise.
+	* sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise.
+	* sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use
+	math_opt_barrier and math_force_eval macros.
+	* sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise.
+	* sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise.
+	* sysdeps/i386/fpu/s_nexttoward.c: Include float.h.
+	(__nexttoward): Use math_opt_barrier and
+	math_force_eval macros.  Use "+m" constraint on asm rather than
+	"=m" and "m".  Only use asm to force double result if
+	FLT_EVAL_METHOD is 2.
+	* sysdeps/i386/fpu/s_nexttowardf.c: Include float.h.
+	(__nexttowardf): Use math_opt_barrier and
+	math_force_eval macros.  Use "+m" constraint on asm rather than
+	"=m" and "m".  Only use asm to force double result if
+	FLT_EVAL_METHOD is not 0.
+	* sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h.
+	(__nexttowardf): Use math_opt_barrier and
+	math_force_eval macros.  If FLT_EVAL_METHOD is not 0, force
+	x to float using asm.
+	* sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h.
+	(__nldbl_nexttowardf): Use math_opt_barrier and
+	math_force_eval macros.  If FLT_EVAL_METHOD is not 0, force
+	x to float using asm.
+	* sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h.
+	(__nexttowardf): Use math_opt_barrier and math_force_eval
+	macros.  If FLT_EVAL_METHOD is not 0, force x to float using asm.
+	* math/bug-nextafter.c (zero, inf): New variables.
+	(main): Add new tests.
+	* math/bug-nexttoward.c (zero, inf): New variables.
+	(main): Add new tests.
+
 2007-03-22  Jakub Jelinek  <jakub@redhat.com>
 
 	[BZ #3427]
diff --git a/math/bug-nextafter.c b/math/bug-nextafter.c
index 2a967c7..1d21841 100644
--- a/math/bug-nextafter.c
+++ b/math/bug-nextafter.c
@@ -4,6 +4,9 @@
 #include <stdlib.h>
 #include <stdio.h>
 
+float zero = 0.0;
+float inf = INFINITY;
+
 int
 main (void)
 {
@@ -34,6 +37,81 @@ main (void)
       ++result;
     }
 
+  i = 0;
+  m = FLT_MIN;
+  feclearexcept (FE_ALL_EXCEPT);
+  i = nextafterf (m, i);
+  if (i < 0 || i >= FLT_MIN)
+    {
+      puts ("nextafterf+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterf+ did not underflow");
+      ++result;
+    }
+  i = 0;
+  feclearexcept (FE_ALL_EXCEPT);
+  i = nextafterf (-m, -i);
+  if (i > 0 || i <= -FLT_MIN)
+    {
+      puts ("nextafterf- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterf- did not underflow");
+      ++result;
+    }
+  i = -INFINITY;
+  feclearexcept (FE_ALL_EXCEPT);
+  m = nextafterf (zero, inf);
+  if (m < 0.0 || m >= FLT_MIN)
+    {
+      puts ("nextafterf+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterf+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafterf (m, i) != 0.0)
+    {
+      puts ("nextafterf+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterf+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  m = nextafterf (copysignf (zero, -1.0), -inf);
+  if (m > 0.0 || m <= -FLT_MIN)
+    {
+      puts ("nextafterf- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterf- did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafterf (m, -i) != 0.0)
+    {
+      puts ("nextafterf- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterf- did not underflow");
+      ++result;
+    }
+
   double di = INFINITY;
   double dm = DBL_MAX;
   feclearexcept (FE_ALL_EXCEPT);
@@ -59,5 +137,182 @@ main (void)
       ++result;
     }
 
+  di = 0;
+  dm = DBL_MIN;
+  feclearexcept (FE_ALL_EXCEPT);
+  di = nextafter (dm, di);
+  if (di < 0 || di >= DBL_MIN)
+    {
+      puts ("nextafter+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafter+ did not underflow");
+      ++result;
+    }
+  di = 0;
+  feclearexcept (FE_ALL_EXCEPT);
+  di = nextafter (-dm, -di);
+  if (di > 0 || di <= -DBL_MIN)
+    {
+      puts ("nextafter- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafter- did not underflow");
+      ++result;
+    }
+  di = -INFINITY;
+  feclearexcept (FE_ALL_EXCEPT);
+  dm = nextafter (zero, inf);
+  if (dm < 0.0 || dm >= DBL_MIN)
+    {
+      puts ("nextafter+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafter+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafter (dm, di) != 0.0)
+    {
+      puts ("nextafter+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafter+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  dm = nextafter (copysign (zero, -1.0), -inf);
+  if (dm > 0.0 || dm <= -DBL_MIN)
+    {
+      puts ("nextafter- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafter- did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafter (dm, -di) != 0.0)
+    {
+      puts ("nextafter- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafter- did not underflow");
+      ++result;
+    }
+
+#ifndef NO_LONG_DOUBLE
+  long double li = INFINITY;
+  long double lm = LDBL_MAX;
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafterl (lm, li) != li)
+    {
+      puts ("nextafterl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_OVERFLOW) == 0)
+    {
+      puts ("nextafterl+ did not overflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafterl (-lm, -li) != -li)
+    {
+      puts ("nextafterl failed");
+      ++result;
+    }
+  if (fetestexcept (FE_OVERFLOW) == 0)
+    {
+      puts ("nextafterl- did not overflow");
+      ++result;
+    }
+
+  li = 0;
+  lm = LDBL_MIN;
+  feclearexcept (FE_ALL_EXCEPT);
+  li = nextafterl (lm, li);
+  if (li < 0 || li >= LDBL_MIN)
+    {
+      puts ("nextafterl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterl+ did not underflow");
+      ++result;
+    }
+  li = 0;
+  feclearexcept (FE_ALL_EXCEPT);
+  li = nextafterl (-lm, -li);
+  if (li > 0 || li <= -LDBL_MIN)
+    {
+      puts ("nextafterl- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterl- did not underflow");
+      ++result;
+    }
+  li = -INFINITY;
+  feclearexcept (FE_ALL_EXCEPT);
+  lm = nextafterl (zero, inf);
+  if (lm < 0.0 || lm >= LDBL_MIN)
+    {
+      puts ("nextafterl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterl+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafterl (lm, li) != 0.0)
+    {
+      puts ("nextafterl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterl+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  lm = nextafterl (copysign (zero, -1.0), -inf);
+  if (lm > 0.0 || lm <= -LDBL_MIN)
+    {
+      puts ("nextafterl- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterl- did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nextafterl (lm, -li) != 0.0)
+    {
+      puts ("nextafterl- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nextafterl- did not underflow");
+      ++result;
+    }
+#endif
+
   return result;
 }
diff --git a/math/bug-nexttoward.c b/math/bug-nexttoward.c
index e306a12..ff57e5e 100644
--- a/math/bug-nexttoward.c
+++ b/math/bug-nexttoward.c
@@ -4,6 +4,9 @@
 #include <stdlib.h>
 #include <stdio.h>
 
+float zero = 0.0;
+float inf = INFINITY;
+
 int
 main (void)
 {
@@ -35,6 +38,81 @@ main (void)
       ++result;
     }
 
+  fi = 0;
+  m = FLT_MIN;
+  feclearexcept (FE_ALL_EXCEPT);
+  fi = nexttowardf (m, fi);
+  if (fi < 0 || fi >= FLT_MIN)
+    {
+      puts ("nexttowardf+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardf+ did not underflow");
+      ++result;
+    }
+  fi = 0;
+  feclearexcept (FE_ALL_EXCEPT);
+  fi = nexttowardf (-m, -fi);
+  if (fi > 0 || fi <= -FLT_MIN)
+    {
+      puts ("nexttowardf- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardf- did not underflow");
+      ++result;
+    }
+  fi = -INFINITY;
+  feclearexcept (FE_ALL_EXCEPT);
+  m = nexttowardf (zero, inf);
+  if (m < 0.0 || m >= FLT_MIN)
+    {
+      puts ("nexttowardf+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardf+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttowardf (m, fi) != 0.0)
+    {
+      puts ("nexttowardf+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardf+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  m = nexttowardf (copysignf (zero, -1.0), -inf);
+  if (m > 0.0 || m <= -FLT_MIN)
+    {
+      puts ("nexttowardf- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardf- did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttowardf (m, -fi) != 0.0)
+    {
+      puts ("nexttowardf- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardf- did not underflow");
+      ++result;
+    }
+
   tl = (long double) DBL_MAX + 1.0e305L;
   double di = INFINITY;
   double dm = DBL_MAX;
@@ -61,5 +139,182 @@ main (void)
       ++result;
     }
 
+  di = 0;
+  dm = DBL_MIN;
+  feclearexcept (FE_ALL_EXCEPT);
+  di = nexttoward (dm, di);
+  if (di < 0 || di >= DBL_MIN)
+    {
+      puts ("nexttoward+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttoward+ did not underflow");
+      ++result;
+    }
+  di = 0;
+  feclearexcept (FE_ALL_EXCEPT);
+  di = nexttoward (-dm, -di);
+  if (di > 0 || di <= -DBL_MIN)
+    {
+      puts ("nexttoward- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttoward- did not underflow");
+      ++result;
+    }
+  di = -INFINITY;
+  feclearexcept (FE_ALL_EXCEPT);
+  dm = nexttoward (zero, inf);
+  if (dm < 0.0 || dm >= DBL_MIN)
+    {
+      puts ("nexttoward+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttoward+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttoward (dm, di) != 0.0)
+    {
+      puts ("nexttoward+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttoward+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  dm = nexttoward (copysign (zero, -1.0), -inf);
+  if (dm > 0.0 || dm <= -DBL_MIN)
+    {
+      puts ("nexttoward- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttoward- did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttoward (dm, -di) != 0.0)
+    {
+      puts ("nexttoward- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttoward- did not underflow");
+      ++result;
+    }
+
+#ifndef NO_LONG_DOUBLE
+  long double li = INFINITY;
+  long double lm = LDBL_MAX;
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttowardl (lm, li) != li)
+    {
+      puts ("nexttowardl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_OVERFLOW) == 0)
+    {
+      puts ("nexttowardl+ did not overflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttowardl (-lm, -li) != -li)
+    {
+      puts ("nexttowardl failed");
+      ++result;
+    }
+  if (fetestexcept (FE_OVERFLOW) == 0)
+    {
+      puts ("nexttowardl- did not overflow");
+      ++result;
+    }
+
+  li = 0;
+  lm = LDBL_MIN;
+  feclearexcept (FE_ALL_EXCEPT);
+  li = nexttowardl (lm, li);
+  if (li < 0 || li >= LDBL_MIN)
+    {
+      puts ("nexttowardl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardl+ did not underflow");
+      ++result;
+    }
+  li = 0;
+  feclearexcept (FE_ALL_EXCEPT);
+  li = nexttowardl (-lm, -li);
+  if (li > 0 || li <= -LDBL_MIN)
+    {
+      puts ("nexttowardl- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardl- did not underflow");
+      ++result;
+    }
+  li = -INFINITY;
+  feclearexcept (FE_ALL_EXCEPT);
+  lm = nexttowardl (zero, inf);
+  if (lm < 0.0 || lm >= LDBL_MIN)
+    {
+      puts ("nexttowardl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardl+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttowardl (lm, li) != 0.0)
+    {
+      puts ("nexttowardl+ failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardl+ did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  lm = nexttowardl (copysign (zero, -1.0), -inf);
+  if (lm > 0.0 || lm <= -LDBL_MIN)
+    {
+      puts ("nexttowardl- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardl- did not underflow");
+      ++result;
+    }
+  feclearexcept (FE_ALL_EXCEPT);
+  if (nexttowardl (lm, -li) != 0.0)
+    {
+      puts ("nexttowardl- failed");
+      ++result;
+    }
+  if (fetestexcept (FE_UNDERFLOW) == 0)
+    {
+      puts ("nexttowardl- did not underflow");
+      ++result;
+    }
+#endif
+
   return result;
 }
diff --git a/math/math_private.h b/math/math_private.h
index 572e546..129646f 100644
--- a/math/math_private.h
+++ b/math/math_private.h
@@ -332,4 +332,10 @@ extern double __slowexp (double __x);
 extern double __slowpow (double __x, double __y, double __z);
 extern void __docos (double __x, double __dx, double __v[]);
 
+#ifndef math_opt_barrier
+#define math_opt_barrier(x) \
+({ __typeof (x) __x = x; __asm ("" : "+m" (__x)); __x; })
+#define math_force_eval(x) __asm __volatile ("" : : "m" (x))
+#endif
+
 #endif /* _MATH_PRIVATE_H_ */
diff --git a/math/s_nextafter.c b/math/s_nextafter.c
index 9c678b7..d2af52d 100644
--- a/math/s_nextafter.c
+++ b/math/s_nextafter.c
@@ -26,7 +26,7 @@ static char rcsid[] = "$NetBSD: s_nextafter.c,v 1.8 1995/05/10 20:47:58 jtc Exp
 #define nexttoward __internal_nexttoward
 
 #include <math.h>
-#include "math_private.h"
+#include <math_private.h>
 #include <float.h>
 
 #ifdef __STDC__
@@ -49,9 +49,12 @@ static char rcsid[] = "$NetBSD: s_nextafter.c,v 1.8 1995/05/10 20:47:58 jtc Exp
 	   return x+y;
 	if(x==y) return y;		/* x=y, return y */
 	if((ix|lx)==0) {			/* x == 0 */
+	    double u;
 	    INSERT_WORDS(x,hy&0x80000000,1);	/* return +-minsubnormal */
-	    y = x*x;
-	    if(y==x) return y; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u*u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(hx>hy||((hx==hy)&&(lx>ly))) {	/* x > y, x -= ulp */
@@ -74,15 +77,12 @@ static char rcsid[] = "$NetBSD: s_nextafter.c,v 1.8 1995/05/10 20:47:58 jtc Exp
 	if(hy>=0x7ff00000) {
 	  x = x+x;	/* overflow  */
 	  if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1)
-	    asm ("" : "=m"(x) : "m"(x));
+	    asm ("" : "+m"(x));
 	  return x;	/* overflow  */
 	}
-	if(hy<0x00100000) {		/* underflow */
-	    y = x*x;
-	    if(y!=x) {		/* raise underflow flag */
-	        INSERT_WORDS(y,hx,lx);
-		return y;
-	    }
+	if(hy<0x00100000) {
+	    double u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	INSERT_WORDS(x,hx,lx);
 	return x;
diff --git a/math/s_nexttowardf.c b/math/s_nexttowardf.c
index 1a10266..0494d1a 100644
--- a/math/s_nexttowardf.c
+++ b/math/s_nexttowardf.c
@@ -21,7 +21,7 @@
  */
 
 #include <math.h>
-#include "math_private.h"
+#include <math_private.h>
 #include <float.h>
 
 #ifdef __STDC__
@@ -45,10 +45,12 @@
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if(ix==0) {				/* x == 0 */
-	    float x2;
+	    float u;
 	    SET_FLOAT_WORD(x,(u_int32_t)(hy&0x80000000)|1);/* return +-minsub*/
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x; /* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		 /* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(hy<0||(ix>>23)>(iy>>20)-0x380
@@ -70,15 +72,12 @@
 	  x = x+x;	/* overflow  */
 	  if (FLT_EVAL_METHOD != 0)
 	    /* Force conversion to float.  */
-	    asm ("" : "=m"(x) : "m"(x));
+	    asm ("" : "+m"(x));
 	  return x;
 	}
-	if(hy<0x00800000) {		/* underflow */
-	    float x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-		SET_FLOAT_WORD(x2,hx);
-		return x2;
-	    }
+	if(hy<0x00800000) {
+	    float u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	SET_FLOAT_WORD(x,hx);
 	return x;
diff --git a/sysdeps/i386/fpu/math_private.h b/sysdeps/i386/fpu/math_private.h
new file mode 100644
index 0000000..a426788
--- /dev/null
+++ b/sysdeps/i386/fpu/math_private.h
@@ -0,0 +1,18 @@
+#ifndef _MATH_PRIVATE_H
+
+#define math_opt_barrier(x) \
+({ __typeof(x) __x;					\
+   __asm ("" : "=t" (__x) : "0" (x));			\
+   __x; })
+#define math_force_eval(x) \
+do							\
+  {							\
+    if (sizeof (x) <= sizeof (double))			\
+      __asm __volatile ("" : : "m" (x));		\
+    else						\
+      __asm __volatile ("" : : "f" (x));		\
+  }							\
+while (0)
+
+#include <math/math_private.h>
+#endif
diff --git a/sysdeps/i386/fpu/s_nexttoward.c b/sysdeps/i386/fpu/s_nexttoward.c
index 2bd768e..9bd86a3 100644
--- a/sysdeps/i386/fpu/s_nexttoward.c
+++ b/sysdeps/i386/fpu/s_nexttoward.c
@@ -27,7 +27,8 @@ static char rcsid[] = "$NetBSD: $";
  */
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
+#include <float.h>
 
 #ifdef __STDC__
 	double __nexttoward(double x, long double y)
@@ -52,10 +53,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if((ix|lx)==0) {			/* x == 0 */
-	    double x2;
+	    double u;
 	    INSERT_WORDS(x,(esy&0x8000)<<16,1); /* return +-minsub */
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if (esy>=0x8000||((ix>>20)&0x7ff)>iy-0x3c00
@@ -85,16 +88,14 @@ static char rcsid[] = "$NetBSD: $";
 	hy = hx&0x7ff00000;
 	if(hy>=0x7ff00000) {
 	  x = x+x;	/* overflow  */
-	  /* Force conversion to double.  */
-	  asm ("" : "=m"(x) : "m"(x));
+	  if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1)
+	    /* Force conversion to double.  */
+	    asm ("" : "+m"(x));
 	  return x;
 	}
-	if(hy<0x00100000) {		/* underflow */
-	    double x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        INSERT_WORDS(x2,hx,lx);
-		return x2;
-	    }
+	if(hy<0x00100000) {
+	    double u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	INSERT_WORDS(x,hx,lx);
 	return x;
diff --git a/sysdeps/i386/fpu/s_nexttowardf.c b/sysdeps/i386/fpu/s_nexttowardf.c
index 3fbe53c..25f70e4 100644
--- a/sysdeps/i386/fpu/s_nexttowardf.c
+++ b/sysdeps/i386/fpu/s_nexttowardf.c
@@ -19,7 +19,8 @@ static char rcsid[] = "$NetBSD: $";
 #endif
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
+#include <float.h>
 
 #ifdef __STDC__
 	float __nexttowardf(float x, long double y)
@@ -44,10 +45,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if(ix==0) {				/* x == 0 */
-	    float x2;
+	    float u;
 	    SET_FLOAT_WORD(x,((esy&0x8000)<<16)|1);/* return +-minsub*/
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(esy>=0x8000||((ix>>23)&0xff)>iy-0x3f80
@@ -69,16 +72,14 @@ static char rcsid[] = "$NetBSD: $";
 	hy = hx&0x7f800000;
 	if(hy>=0x7f800000) {
 	  x = x+x;	/* overflow  */
-	  /* Force conversion to float.  */
-	  asm ("" : "=m"(x) : "m"(x));
+	  if (FLT_EVAL_METHOD != 0)
+	    /* Force conversion to float.  */
+	    asm ("" : "+m"(x));
 	  return x;
 	}
-	if(hy<0x00800000) {		/* underflow */
-	    float x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        SET_FLOAT_WORD(x2,hx);
-		return x2;
-	    }
+	if(hy<0x00800000) {
+	    float u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	SET_FLOAT_WORD(x,hx);
 	return x;
diff --git a/sysdeps/ieee754/flt-32/s_nextafterf.c b/sysdeps/ieee754/flt-32/s_nextafterf.c
index e1568e2..600c146 100644
--- a/sysdeps/ieee754/flt-32/s_nextafterf.c
+++ b/sysdeps/ieee754/flt-32/s_nextafterf.c
@@ -18,7 +18,7 @@ static char rcsid[] = "$NetBSD: s_nextafterf.c,v 1.4 1995/05/10 20:48:01 jtc Exp
 #endif
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
 #include <float.h>
 
 #ifdef __STDC__
@@ -40,9 +40,12 @@ static char rcsid[] = "$NetBSD: s_nextafterf.c,v 1.4 1995/05/10 20:48:01 jtc Exp
 	   return x+y;
 	if(x==y) return y;		/* x=y, return y */
 	if(ix==0) {				/* x == 0 */
+	    float u;
 	    SET_FLOAT_WORD(x,(hy&0x80000000)|1);/* return +-minsubnormal */
-	    y = x*x;
-	    if(y==x) return y; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u*u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(hx>hy) {				/* x > y, x -= ulp */
@@ -61,15 +64,12 @@ static char rcsid[] = "$NetBSD: s_nextafterf.c,v 1.4 1995/05/10 20:48:01 jtc Exp
 	if(hy>=0x7f800000) {
 	  x = x+x;	/* overflow  */
 	  if (FLT_EVAL_METHOD != 0)
-	    asm ("" : "=m"(x) : "m"(x));
+	    asm ("" : "+m"(x));
 	  return x;	/* overflow  */
 	}
-	if(hy<0x00800000) {		/* underflow */
-	    y = x*x;
-	    if(y!=x) {		/* raise underflow flag */
-	        SET_FLOAT_WORD(y,hx);
-		return y;
-	    }
+	if(hy<0x00800000) {
+	    float u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	SET_FLOAT_WORD(x,hx);
 	return x;
diff --git a/sysdeps/ieee754/ldbl-128/s_nexttoward.c b/sysdeps/ieee754/ldbl-128/s_nexttoward.c
index 553e401..178505c 100644
--- a/sysdeps/ieee754/ldbl-128/s_nexttoward.c
+++ b/sysdeps/ieee754/ldbl-128/s_nexttoward.c
@@ -26,7 +26,7 @@ static char rcsid[] = "$NetBSD: $";
  */
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
 #include <float.h>
 
 #ifdef __STDC__
@@ -53,10 +53,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if((ix|lx)==0) {			/* x == 0 */
-	    double x2;
+	    double u;
 	    INSERT_WORDS(x,(u_int32_t)((hy>>32)&0x80000000),1);/* return +-minsub */
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if (hy<0||(ix>>20)>(iy>>48)-0x3c00
@@ -87,16 +89,13 @@ static char rcsid[] = "$NetBSD: $";
 	if(hy>=0x7ff00000) {
 	  x = x+x;	/* overflow  */
 	  if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1)
-	    /* Force conversion to float.  */
-	    asm ("" : "=m"(x) : "m"(x));
+	    /* Force conversion to double.  */
+	    asm ("" : "+m"(x));
 	  return x;
 	}
-	if(hy<0x00100000) {		/* underflow */
-	    double x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        INSERT_WORDS(x2,hx,lx);
-		return x2;
-	    }
+	if(hy<0x00100000) {
+	    double u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	INSERT_WORDS(x,hx,lx);
 	return x;
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c b/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
index 3335100..e2f6521 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
@@ -26,7 +26,7 @@ static char rcsid[] = "$NetBSD: $";
  */
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
 #include <math_ldbl_opt.h>
 #include <float.h>
 
@@ -55,10 +55,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if((ix|lx)==0) {			/* x == 0 */
-	    double x2;
+	    double u;
 	    INSERT_WORDS(x,(u_int32_t)((hy>>32)&0x80000000),1);/* return +-minsub */
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if (hy<0||(ix>>20)>(iy>>52)
@@ -89,16 +91,13 @@ static char rcsid[] = "$NetBSD: $";
 	if(hy>=0x7ff00000) {
 	  x = x+x;	/* overflow  */
 	  if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1)
-	    /* Force conversion to float.  */
-	    asm ("" : "=m"(x) : "m"(x));
+	    /* Force conversion to double.  */
+	    asm ("" : "+m"(x));
 	  return x;
 	}
-	if(hy<0x00100000) {		/* underflow */
-	    double x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        INSERT_WORDS(x2,hx,lx);
-		return x2;
-	    }
+	if(hy<0x00100000) {
+	    double u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	INSERT_WORDS(x,hx,lx);
 	return x;
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c b/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
index a9373ff..cf655fa 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
@@ -19,8 +19,9 @@ static char rcsid[] = "$NetBSD: $";
 #endif
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
 #include <math_ldbl_opt.h>
+#include <float.h>
 
 #ifdef __STDC__
 	float __nexttowardf(float x, long double y)
@@ -46,10 +47,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if(ix==0) {				/* x == 0 */
-	    float x2;
+	    float u;
 	    SET_FLOAT_WORD(x,(u_int32_t)((hy>>32)&0x80000000)|1);/* return +-minsub*/
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(hy<0||(ix>>23)>(iy>>52)-0x380
@@ -69,13 +72,16 @@ static char rcsid[] = "$NetBSD: $";
 	    }
 	}
 	hy = hx&0x7f800000;
-	if(hy>=0x7f800000) return x+x;	/* overflow  */
+	if(hy>=0x7f800000) {
+	  x = x+x;	/* overflow  */
+	  if (FLT_EVAL_METHOD != 0)
+	    /* Force conversion to float.  */
+	    asm ("" : "+m"(x));
+	  return x;
+	}
 	if(hy<0x00800000) {		/* underflow */
-	    float x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        SET_FLOAT_WORD(x2,hx);
-		return x2;
-	    }
+	    float u = x*x;
+	    math_force_eval (u);	/* raise underflow flag */
 	}
 	SET_FLOAT_WORD(x,hx);
 	return x;
diff --git a/sysdeps/ieee754/ldbl-96/s_nexttoward.c b/sysdeps/ieee754/ldbl-96/s_nexttoward.c
index 7945cb5..e304384 100644
--- a/sysdeps/ieee754/ldbl-96/s_nexttoward.c
+++ b/sysdeps/ieee754/ldbl-96/s_nexttoward.c
@@ -26,7 +26,7 @@ static char rcsid[] = "$NetBSD: $";
  */
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
 #include <float.h>
 
 #ifdef __STDC__
@@ -50,10 +50,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if((ix|lx)==0) {			/* x == 0 */
-	    double x2;
+	    double u;
 	    INSERT_WORDS(x,(esy&0x8000)<<16,1); /* return +-minsub */
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if (esy>=0x8000||((ix>>20)&0x7ff)>iy-0x3c00
@@ -84,16 +86,13 @@ static char rcsid[] = "$NetBSD: $";
 	if(hy>=0x7ff00000) {
 	  x = x+x;	/* overflow  */
 	  if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1)
-	    /* Force conversion to float.  */
-	    asm ("" : "=m"(x) : "m"(x));
+	    /* Force conversion to double.  */
+	    asm ("" : "+m"(x));
 	  return x;
 	}
-	if(hy<0x00100000) {		/* underflow */
-	    double x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        INSERT_WORDS(x2,hx,lx);
-		return x2;
-	    }
+	if(hy<0x00100000) {
+	    double u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	INSERT_WORDS(x,hx,lx);
 	return x;
diff --git a/sysdeps/ieee754/ldbl-96/s_nexttowardf.c b/sysdeps/ieee754/ldbl-96/s_nexttowardf.c
index a1c38b5..6357975 100644
--- a/sysdeps/ieee754/ldbl-96/s_nexttowardf.c
+++ b/sysdeps/ieee754/ldbl-96/s_nexttowardf.c
@@ -18,7 +18,8 @@ static char rcsid[] = "$NetBSD: $";
 #endif
 
 #include "math.h"
-#include "math_private.h"
+#include <math_private.h>
+#include <float.h>
 
 #ifdef __STDC__
 	float __nexttowardf(float x, long double y)
@@ -41,10 +42,12 @@ static char rcsid[] = "$NetBSD: $";
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if(ix==0) {				/* x == 0 */
-	    float x2;
+	    float u;
 	    SET_FLOAT_WORD(x,((esy&0x8000)<<16)|1);/* return +-minsub*/
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x;	/* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(esy>=0x8000||((ix>>23)&0xff)>iy-0x3f80
@@ -64,13 +67,16 @@ static char rcsid[] = "$NetBSD: $";
 	    }
 	}
 	hy = hx&0x7f800000;
-	if(hy>=0x7f800000) return x+x;	/* overflow  */
-	if(hy<0x00800000) {		/* underflow */
-	    float x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-	        SET_FLOAT_WORD(x2,hx);
-		return x2;
-	    }
+	if(hy>=0x7f800000) {
+	  x = x+x;	/* overflow  */
+	  if (FLT_EVAL_METHOD != 0)
+	    /* Force conversion to float.  */
+	    asm ("" : "+m"(x));
+	  return x;
+	}
+	if(hy<0x00800000) {
+	    float u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	SET_FLOAT_WORD(x,hx);
 	return x;
diff --git a/sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c b/sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c
index d52526f..68027f2 100644
--- a/sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c
+++ b/sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c
@@ -20,7 +20,10 @@
  *   Special cases:
  */
 
+#include <math.h>
+#include <math_private.h>
 #include <math_ldbl_opt.h>
+#include <float.h>
 
 float __nldbl_nexttowardf(float x, double y);
 
@@ -39,10 +42,12 @@ float __nldbl_nexttowardf(float x, double y)
 	   return x+y;
 	if((double) x==y) return y;		/* x=y, return y */
 	if(ix==0) {				/* x == 0 */
-	    float x2;
+	    float u;
 	    SET_FLOAT_WORD(x,(u_int32_t)(hy&0x80000000)|1);/* return +-minsub*/
-	    x2 = x*x;
-	    if(x2==x) return x2; else return x; /* raise underflow flag */
+	    u = math_opt_barrier (x);
+	    u = u * u;
+	    math_force_eval (u);		/* raise underflow flag */
+	    return x;
 	}
 	if(hx>=0) {				/* x > 0 */
 	    if(hy<0||(ix>>23)>(iy>>20)-0x380
@@ -60,13 +65,16 @@ float __nldbl_nexttowardf(float x, double y)
 		hx += 1;
 	}
 	hy = hx&0x7f800000;
-	if(hy>=0x7f800000) return x+x;	/* overflow  */
-	if(hy<0x00800000) {		/* underflow */
-	    float x2 = x*x;
-	    if(x2!=x) {		/* raise underflow flag */
-		SET_FLOAT_WORD(x2,hx);
-		return x2;
-	    }
+	if(hy>=0x7f800000) {
+	  x = x+x;	/* overflow  */
+	  if (FLT_EVAL_METHOD != 0)
+	    /* Force conversion to float.  */
+	    asm ("" : "+m"(x));
+	  return x;
+	}
+	if(hy<0x00800000) {
+	    float u = x*x;			/* underflow */
+	    math_force_eval (u);		/* raise underflow flag */
 	}
 	SET_FLOAT_WORD(x,hx);
 	return x;
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
new file mode 100644
index 0000000..4febcbb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -0,0 +1,21 @@
+#ifndef _MATH_PRIVATE_H
+
+#define math_opt_barrier(x) \
+({ __typeof(x) __x;					\
+   if (sizeof (x) <= sizeof (double))			\
+     __asm ("" : "=x" (__x) : "0" (x));			\
+   else							\
+     __asm ("" : "=t" (__x) : "0" (x));			\
+   __x; })
+#define math_force_eval(x) \
+do							\
+  {							\
+    if (sizeof (x) <= sizeof (double))			\
+      __asm __volatile ("" : : "x" (x));		\
+    else						\
+      __asm __volatile ("" : : "f" (x));		\
+  }							\
+while (0)
+
+#include <math/math_private.h>
+#endif