aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@baylibre.com>2025-08-27 10:11:25 +0000
committerJeff Johnston <jjohnstn@redhat.com>2025-08-27 17:45:03 -0400
commitbd409f3c12e28e1464dec7fd0d45db30280f1e56 (patch)
treeff30331873ab9c9fa7b8cec47f461e63e25f5880
parent2934367c3d8096da72cac29e3404333fc2d3152f (diff)
downloadnewlib-bd409f3c12e28e1464dec7fd0d45db30280f1e56.zip
newlib-bd409f3c12e28e1464dec7fd0d45db30280f1e56.tar.gz
newlib-bd409f3c12e28e1464dec7fd0d45db30280f1e56.tar.bz2
amdgcn, libm: fix vector ilogb bugs (bug 33272)
The vector ilogb routines, including the ones inlined into fmod, had a bug in which the conditional masks were not properly applied, causing the value of one lane to be affected by conditional choices of another lane. The problem was not immediately obviously because all values were calculated correctly when no lane contained a subnormal input. The problem is fixed by proper use of VECTOR_COND_MOVE and VECTOR_WHILE.
-rw-r--r--newlib/libm/machine/amdgcn/amdgcn_veclib.h10
-rw-r--r--newlib/libm/machine/amdgcn/v64df_fmod.c95
-rw-r--r--newlib/libm/machine/amdgcn/v64df_ilogb.c22
-rw-r--r--newlib/libm/machine/amdgcn/v64sf_fmod.c58
-rw-r--r--newlib/libm/machine/amdgcn/v64sf_ilogb.c11
5 files changed, 99 insertions, 97 deletions
diff --git a/newlib/libm/machine/amdgcn/amdgcn_veclib.h b/newlib/libm/machine/amdgcn/amdgcn_veclib.h
index 9e9d3eb..f7dadb7 100644
--- a/newlib/libm/machine/amdgcn/amdgcn_veclib.h
+++ b/newlib/libm/machine/amdgcn/amdgcn_veclib.h
@@ -267,7 +267,15 @@ do { \
__tmp; \
})
-#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \
+#define VECTOR_WHILE(cond, cond_var) \
+{ \
+ __auto_type cond_var = __mask; \
+ for (;;) { \
+ cond_var &= __builtin_convertvector (cond, __typeof (cond_var)); \
+ if (ALL_ZEROES_P (cond_var)) \
+ break;
+
+#define VECTOR_WHILE2(cond, cond_var, prev_cond_var) \
{ \
__auto_type cond_var = prev_cond_var; \
for (;;) { \
diff --git a/newlib/libm/machine/amdgcn/v64df_fmod.c b/newlib/libm/machine/amdgcn/v64df_fmod.c
index 750546f..ba12577 100644
--- a/newlib/libm/machine/amdgcn/v64df_fmod.c
+++ b/newlib/libm/machine/amdgcn/v64df_fmod.c
@@ -61,17 +61,19 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y)
v64si ix;
VECTOR_IF (hx < 0x00100000, cond) // subnormal x
VECTOR_IF2 (hx == 0, cond2, cond)
- ix = VECTOR_INIT (-1043);
- for (v64si i = __builtin_convertvector (lx, v64si);
- !ALL_ZEROES_P (cond2 & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
+ VECTOR_COND_MOVE (ix, VECTOR_INIT (-1043), cond2);
+ v64si i = __builtin_convertvector (lx, v64si);
+ VECTOR_WHILE2 (i > 0, cond3, cond2);
+ VECTOR_COND_MOVE (ix, ix - 1, cond3);
+ VECTOR_COND_MOVE (i, i << 1, cond3);
+ VECTOR_ENDWHILE
VECTOR_ELSE2 (cond2, cond)
- ix = VECTOR_INIT (-1022);
- for (v64si i = __builtin_convertvector (hx << 11, v64si);
- !ALL_ZEROES_P (cond2 & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
+ VECTOR_COND_MOVE (ix, VECTOR_INIT (-1022), cond2);
+ v64si i = __builtin_convertvector (hx << 11, v64si);
+ VECTOR_WHILE2 (i > 0, cond3, cond2)
+ VECTOR_COND_MOVE (ix, ix - 1, cond3);
+ VECTOR_COND_MOVE (i, i << 1, cond3);
+ VECTOR_ENDWHILE
VECTOR_ENDIF
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (ix, (hx >> 20) - 1023, cond);
@@ -81,17 +83,19 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y)
v64si iy;
VECTOR_IF (hy < 0x00100000, cond) // subnormal y
VECTOR_IF2 (hy == 0, cond2, cond)
- iy = VECTOR_INIT (-1043);
- for (v64si i = __builtin_convertvector (ly, v64si);
- !ALL_ZEROES_P (cond2 & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0));
+ VECTOR_COND_MOVE (iy, VECTOR_INIT (-1043), cond2);
+ v64si i = __builtin_convertvector (ly, v64si);
+ VECTOR_WHILE2 (i > 0, cond3, cond2);
+ VECTOR_COND_MOVE (iy, iy - 1, cond3);
+ VECTOR_COND_MOVE (i, i << 1, cond3);
+ VECTOR_ENDWHILE
VECTOR_ELSE2 (cond2, cond)
- iy = VECTOR_INIT (-1022);
- for (v64si i = __builtin_convertvector (hy << 11, v64si);
- !ALL_ZEROES_P (cond2 & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0));
+ VECTOR_COND_MOVE (iy, VECTOR_INIT (-1022), cond2);
+ v64si i = __builtin_convertvector (hy << 11, v64si);
+ VECTOR_WHILE2 (i > 0, cond3, cond2);
+ VECTOR_COND_MOVE (iy, iy - 1, cond3);
+ VECTOR_COND_MOVE (i, i << 1, cond3);
+ VECTOR_ENDWHILE
VECTOR_ENDIF
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (iy, (hy >> 20) - 1023, cond);
@@ -130,29 +134,26 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y)
/* fix point fmod */
v64si n = ix - iy;
- v64si cond = n != 0;
- while (!ALL_ZEROES_P (cond))
- {
- hz = hx - hy;
- lz = lx - ly;
- VECTOR_IF2 (lx < ly, cond2, cond)
- VECTOR_COND_MOVE (hz, hz - 1, cond2);
- VECTOR_ENDIF
- VECTOR_IF2 (hz < 0, cond2, cond)
- VECTOR_COND_MOVE (hx, hx + hx + (__builtin_convertvector(lx, v64usi) >> 31), cond2);
- VECTOR_COND_MOVE (lx, lx + lx, cond2);
- VECTOR_ELSE2 (cond2, cond)
- VECTOR_IF2 ((hz | lz) == 0, cond3, cond2) // return sign(x)*0
- VECTOR_RETURN (zeroes, cond3);
- VECTOR_ENDIF
- VECTOR_COND_MOVE (hx, hz + hz + (__builtin_convertvector(lz, v64usi) >> 31), cond2);
- VECTOR_COND_MOVE (lx, lz + lz, cond2);
+ VECTOR_WHILE (n != 0, cond)
+ hz = hx - hy;
+ lz = lx - ly;
+ VECTOR_IF2 (lx < ly, cond2, cond)
+ VECTOR_COND_MOVE (hz, hz - 1, cond2);
+ VECTOR_ENDIF
+ VECTOR_IF2 (hz < 0, cond2, cond)
+ VECTOR_COND_MOVE (hx, hx + hx + (__builtin_convertvector(lx, v64usi) >> 31), cond2);
+ VECTOR_COND_MOVE (lx, lx + lx, cond2);
+ VECTOR_ELSE2 (cond2, cond)
+ VECTOR_IF2 ((hz | lz) == 0, cond3, cond2) // return sign(x)*0
+ VECTOR_RETURN (zeroes, cond3);
VECTOR_ENDIF
+ VECTOR_COND_MOVE (hx, hz + hz + (__builtin_convertvector(lz, v64usi) >> 31), cond2);
+ VECTOR_COND_MOVE (lx, lz + lz, cond2);
+ VECTOR_ENDIF
- n += cond; // Active lanes should be -1
- cond &= (n != 0);
- }
+ VECTOR_COND_MOVE (n, n - 1, cond);
+ VECTOR_ENDWHILE
hz = hx - hy;
lz = lx - ly;
@@ -164,15 +165,11 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y)
/* convert back to floating value and restore the sign */
VECTOR_RETURN (zeroes, (hx | lx) == 0); // return sign(x)*0
- cond = hx < 0x00100000;
- while (!ALL_ZEROES_P (cond)) // normalize x
- {
- VECTOR_COND_MOVE (hx, hx + hx + (lx >> 31), cond);
- VECTOR_COND_MOVE (lx, lx + lx, cond);
- iy += cond; // Active lanes should be -1
-
- cond &= (hx < 0x00100000);
- }
+ VECTOR_WHILE (hx < 0x00100000, cond) // normalize x
+ VECTOR_COND_MOVE (hx, hx + hx + (lx >> 31), cond);
+ VECTOR_COND_MOVE (lx, lx + lx, cond);
+ VECTOR_COND_MOVE (iy, iy - 1, cond);
+ VECTOR_ENDWHILE
VECTOR_IF (iy >= -1022, cond) // normalize output
VECTOR_COND_MOVE (hx, (hx - 0x00100000) | ((iy + 1023) << 20), cond);
INSERT_WORDS (x, hx | sx, lx, cond);
diff --git a/newlib/libm/machine/amdgcn/v64df_ilogb.c b/newlib/libm/machine/amdgcn/v64df_ilogb.c
index 56eb700..5e99322 100644
--- a/newlib/libm/machine/amdgcn/v64df_ilogb.c
+++ b/newlib/libm/machine/amdgcn/v64df_ilogb.c
@@ -36,17 +36,19 @@ DEF_VD_MATH_PRED (v64si, ilogb, v64df x)
VECTOR_IF (hx < 0x00100000, cond)
VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond & ((hx | lx) == 0)); // FP_ILOGB0
VECTOR_IF2 (hx == 0, cond2, cond)
- ix = VECTOR_INIT (-1043);
- for (v64si i = lx;
- !ALL_ZEROES_P (cond2 & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
+ VECTOR_COND_MOVE (ix, VECTOR_INIT (-1043), cond2);
+ v64si i = lx;
+ VECTOR_WHILE2 (i > 0, cond3, cond2)
+ VECTOR_COND_MOVE (ix, ix - 1, cond3);
+ VECTOR_COND_MOVE (i, i << 1, cond3);
+ VECTOR_ENDWHILE
VECTOR_ELSE2 (cond2, cond)
- ix = VECTOR_INIT (-1022);
- for (v64si i = (hx << 11);
- !ALL_ZEROES_P (cond2 & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
+ VECTOR_COND_MOVE (ix, VECTOR_INIT (-1022), cond2);
+ v64si i = (hx << 11);
+ VECTOR_WHILE2 (i > 0, cond3, cond2)
+ VECTOR_COND_MOVE (ix, ix - 1, cond3);
+ VECTOR_COND_MOVE (i, i << 1, cond3);
+ VECTOR_ENDWHILE
VECTOR_ENDIF
VECTOR_RETURN (ix, cond);
VECTOR_ENDIF
diff --git a/newlib/libm/machine/amdgcn/v64sf_fmod.c b/newlib/libm/machine/amdgcn/v64sf_fmod.c
index b62b819..e4ddfc2 100644
--- a/newlib/libm/machine/amdgcn/v64sf_fmod.c
+++ b/newlib/libm/machine/amdgcn/v64sf_fmod.c
@@ -57,11 +57,12 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y)
/* determine ix = ilogb(x) */
v64si ix;
VECTOR_IF (hx < 0x00800000, cond) // subnormal x
- ix = VECTOR_INIT (-126);
- for (v64si i = (hx << 8);
- !ALL_ZEROES_P (cond & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0));
+ VECTOR_COND_MOVE (ix, VECTOR_INIT (-126), cond);
+ v64si i = hx << 8;
+ VECTOR_WHILE2 (i > 0, cond2, cond)
+ VECTOR_COND_MOVE (ix, ix - 1, cond2);
+ VECTOR_COND_MOVE (i, i << 1, cond2);
+ VECTOR_ENDWHILE
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (ix, (hx >> 23) - 127, cond);
VECTOR_ENDIF
@@ -69,12 +70,12 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y)
/* determine iy = ilogb(y) */
v64si iy;
VECTOR_IF (hy < 0x00800000, cond) // subnormal y
- iy = VECTOR_INIT (-126);
- for (v64si i = (hy << 8); !ALL_ZEROES_P (cond & (i >= 0)); /* i <<= 1 */)
- {
- VECTOR_COND_MOVE (iy, iy - 1, cond & (i >= 0));
- VECTOR_COND_MOVE (i, i << 1, cond & (i >= 0));
- }
+ VECTOR_COND_MOVE (iy, VECTOR_INIT (-126), cond);
+ v64si i = (hy << 8);
+ VECTOR_WHILE2 (i >= 0, cond2, cond)
+ VECTOR_COND_MOVE (iy, iy - 1, cond2);
+ VECTOR_COND_MOVE (i, i << 1, cond2);
+ VECTOR_ENDWHILE
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (iy, (hy >> 23) - 127, cond);
VECTOR_ENDIF
@@ -99,24 +100,21 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y)
/* fix point fmod */
v64si n = ix - iy;
- v64si cond = n != 0;
- while (!ALL_ZEROES_P (cond))
- {
- hz = hx - hy;
- VECTOR_IF2 (hz < 0, cond2, cond)
- VECTOR_COND_MOVE (hx, hx + hx, cond2);
- VECTOR_ELSE2 (cond2, cond)
- VECTOR_IF2 (hz == 0, cond3, cond2) // return sign(x)*0
- VECTOR_RETURN (zeroes, cond3);
- VECTOR_ELSE2 (cond3, cond2)
- VECTOR_COND_MOVE (hx, hz + hz, cond2);
- VECTOR_ENDIF
+ VECTOR_WHILE (n != 0, cond)
+ hz = hx - hy;
+ VECTOR_IF2 (hz < 0, cond2, cond)
+ VECTOR_COND_MOVE (hx, hx + hx, cond2);
+ VECTOR_ELSE2 (cond2, cond)
+ VECTOR_IF2 (hz == 0, cond3, cond2) // return sign(x)*0
+ VECTOR_RETURN (zeroes, cond3);
+ VECTOR_ELSE2 (cond3, cond2)
+ VECTOR_COND_MOVE (hx, hz + hz, cond2);
VECTOR_ENDIF
+ VECTOR_ENDIF
- n += cond; // Active lanes should be -1
- cond &= (n != 0);
- }
+ n += cond; // Active lanes should be -1
+ VECTOR_ENDWHILE
hz = hx - hy;
VECTOR_COND_MOVE (hx, hz, hz >= 0);
@@ -124,14 +122,10 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y)
/* convert back to floating value and restore the sign */
VECTOR_RETURN (zeroes, hx == 0); // return sign(x)*0
- cond = hx < 0x00800000;
- while (!ALL_ZEROES_P (cond)) // normalize x
- {
+ VECTOR_WHILE (hx < 0x00800000, cond) // normalize x
VECTOR_COND_MOVE (hx, hx + hx, cond);
iy += cond; // Active lanes should be -1
-
- cond &= (hx < 0x00800000);
- }
+ VECTOR_ENDWHILE
VECTOR_IF (iy >= -126, cond) // normalize output
VECTOR_COND_MOVE (hx, (hx - 0x00800000) | ((iy + 127) << 23), cond);
SET_FLOAT_WORD (x, hx | sx, cond);
diff --git a/newlib/libm/machine/amdgcn/v64sf_ilogb.c b/newlib/libm/machine/amdgcn/v64sf_ilogb.c
index 2f2a7ca..ecaf996 100644
--- a/newlib/libm/machine/amdgcn/v64sf_ilogb.c
+++ b/newlib/libm/machine/amdgcn/v64sf_ilogb.c
@@ -38,11 +38,12 @@ DEF_VS_MATH_FUNC (v64si, ilogbf, v64sf x)
VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond); // FP_ILOGB0
VECTOR_ENDIF
VECTOR_IF (FLT_UWORD_IS_SUBNORMAL (hx), cond)
- ix = VECTOR_INIT (-126);
- for (v64si i = (hx << 8);
- !ALL_ZEROES_P (cond & (i > 0));
- i <<= 1)
- VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0));
+ VECTOR_COND_MOVE (ix, VECTOR_INIT (-126), cond);
+ v64si i = (hx << 8);
+ VECTOR_WHILE2 (i > 0, cond2, cond)
+ VECTOR_COND_MOVE (ix, ix - 1, cond2);
+ VECTOR_COND_MOVE (i, i << 1, cond2);
+ VECTOR_ENDWHILE
VECTOR_RETURN (ix, cond);
VECTOR_ELSEIF (~FLT_UWORD_IS_FINITE (hx), cond)
VECTOR_RETURN (VECTOR_INIT (__INT_MAX__), cond);