diff options
author | Andrew Stubbs <ams@baylibre.com> | 2025-08-27 10:11:25 +0000 |
---|---|---|
committer | Jeff Johnston <jjohnstn@redhat.com> | 2025-08-27 17:45:03 -0400 |
commit | bd409f3c12e28e1464dec7fd0d45db30280f1e56 (patch) | |
tree | ff30331873ab9c9fa7b8cec47f461e63e25f5880 | |
parent | 2934367c3d8096da72cac29e3404333fc2d3152f (diff) | |
download | newlib-bd409f3c12e28e1464dec7fd0d45db30280f1e56.zip newlib-bd409f3c12e28e1464dec7fd0d45db30280f1e56.tar.gz newlib-bd409f3c12e28e1464dec7fd0d45db30280f1e56.tar.bz2 |
amdgcn, libm: fix vector ilogb bugs (bug 33272)
The vector ilogb routines, including the ones inlined into fmod, had a bug
in which the conditional masks were not properly applied, causing the value of
one lane to be affected by conditional choices of another lane. The problem
was not immediately obviously because all values were calculated correctly when
no lane contained a subnormal input.
The problem is fixed by proper use of VECTOR_COND_MOVE and VECTOR_WHILE.
-rw-r--r-- | newlib/libm/machine/amdgcn/amdgcn_veclib.h | 10 | ||||
-rw-r--r-- | newlib/libm/machine/amdgcn/v64df_fmod.c | 95 | ||||
-rw-r--r-- | newlib/libm/machine/amdgcn/v64df_ilogb.c | 22 | ||||
-rw-r--r-- | newlib/libm/machine/amdgcn/v64sf_fmod.c | 58 | ||||
-rw-r--r-- | newlib/libm/machine/amdgcn/v64sf_ilogb.c | 11 |
5 files changed, 99 insertions, 97 deletions
diff --git a/newlib/libm/machine/amdgcn/amdgcn_veclib.h b/newlib/libm/machine/amdgcn/amdgcn_veclib.h index 9e9d3eb..f7dadb7 100644 --- a/newlib/libm/machine/amdgcn/amdgcn_veclib.h +++ b/newlib/libm/machine/amdgcn/amdgcn_veclib.h @@ -267,7 +267,15 @@ do { \ __tmp; \ }) -#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \ +#define VECTOR_WHILE(cond, cond_var) \ +{ \ + __auto_type cond_var = __mask; \ + for (;;) { \ + cond_var &= __builtin_convertvector (cond, __typeof (cond_var)); \ + if (ALL_ZEROES_P (cond_var)) \ + break; + +#define VECTOR_WHILE2(cond, cond_var, prev_cond_var) \ { \ __auto_type cond_var = prev_cond_var; \ for (;;) { \ diff --git a/newlib/libm/machine/amdgcn/v64df_fmod.c b/newlib/libm/machine/amdgcn/v64df_fmod.c index 750546f..ba12577 100644 --- a/newlib/libm/machine/amdgcn/v64df_fmod.c +++ b/newlib/libm/machine/amdgcn/v64df_fmod.c @@ -61,17 +61,19 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y) v64si ix; VECTOR_IF (hx < 0x00100000, cond) // subnormal x VECTOR_IF2 (hx == 0, cond2, cond) - ix = VECTOR_INIT (-1043); - for (v64si i = __builtin_convertvector (lx, v64si); - !ALL_ZEROES_P (cond2 & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0)); + VECTOR_COND_MOVE (ix, VECTOR_INIT (-1043), cond2); + v64si i = __builtin_convertvector (lx, v64si); + VECTOR_WHILE2 (i > 0, cond3, cond2); + VECTOR_COND_MOVE (ix, ix - 1, cond3); + VECTOR_COND_MOVE (i, i << 1, cond3); + VECTOR_ENDWHILE VECTOR_ELSE2 (cond2, cond) - ix = VECTOR_INIT (-1022); - for (v64si i = __builtin_convertvector (hx << 11, v64si); - !ALL_ZEROES_P (cond2 & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0)); + VECTOR_COND_MOVE (ix, VECTOR_INIT (-1022), cond2); + v64si i = __builtin_convertvector (hx << 11, v64si); + VECTOR_WHILE2 (i > 0, cond3, cond2) + VECTOR_COND_MOVE (ix, ix - 1, cond3); + VECTOR_COND_MOVE (i, i << 1, cond3); + VECTOR_ENDWHILE VECTOR_ENDIF VECTOR_ELSE (cond) VECTOR_COND_MOVE (ix, (hx >> 20) - 1023, cond); @@ -81,17 +83,19 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y) v64si iy; VECTOR_IF (hy < 0x00100000, cond) // subnormal y VECTOR_IF2 (hy == 0, cond2, cond) - iy = VECTOR_INIT (-1043); - for (v64si i = __builtin_convertvector (ly, v64si); - !ALL_ZEROES_P (cond2 & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0)); + VECTOR_COND_MOVE (iy, VECTOR_INIT (-1043), cond2); + v64si i = __builtin_convertvector (ly, v64si); + VECTOR_WHILE2 (i > 0, cond3, cond2); + VECTOR_COND_MOVE (iy, iy - 1, cond3); + VECTOR_COND_MOVE (i, i << 1, cond3); + VECTOR_ENDWHILE VECTOR_ELSE2 (cond2, cond) - iy = VECTOR_INIT (-1022); - for (v64si i = __builtin_convertvector (hy << 11, v64si); - !ALL_ZEROES_P (cond2 & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0)); + VECTOR_COND_MOVE (iy, VECTOR_INIT (-1022), cond2); + v64si i = __builtin_convertvector (hy << 11, v64si); + VECTOR_WHILE2 (i > 0, cond3, cond2); + VECTOR_COND_MOVE (iy, iy - 1, cond3); + VECTOR_COND_MOVE (i, i << 1, cond3); + VECTOR_ENDWHILE VECTOR_ENDIF VECTOR_ELSE (cond) VECTOR_COND_MOVE (iy, (hy >> 20) - 1023, cond); @@ -130,29 +134,26 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y) /* fix point fmod */ v64si n = ix - iy; - v64si cond = n != 0; - while (!ALL_ZEROES_P (cond)) - { - hz = hx - hy; - lz = lx - ly; - VECTOR_IF2 (lx < ly, cond2, cond) - VECTOR_COND_MOVE (hz, hz - 1, cond2); - VECTOR_ENDIF - VECTOR_IF2 (hz < 0, cond2, cond) - VECTOR_COND_MOVE (hx, hx + hx + (__builtin_convertvector(lx, v64usi) >> 31), cond2); - VECTOR_COND_MOVE (lx, lx + lx, cond2); - VECTOR_ELSE2 (cond2, cond) - VECTOR_IF2 ((hz | lz) == 0, cond3, cond2) // return sign(x)*0 - VECTOR_RETURN (zeroes, cond3); - VECTOR_ENDIF - VECTOR_COND_MOVE (hx, hz + hz + (__builtin_convertvector(lz, v64usi) >> 31), cond2); - VECTOR_COND_MOVE (lx, lz + lz, cond2); + VECTOR_WHILE (n != 0, cond) + hz = hx - hy; + lz = lx - ly; + VECTOR_IF2 (lx < ly, cond2, cond) + VECTOR_COND_MOVE (hz, hz - 1, cond2); + VECTOR_ENDIF + VECTOR_IF2 (hz < 0, cond2, cond) + VECTOR_COND_MOVE (hx, hx + hx + (__builtin_convertvector(lx, v64usi) >> 31), cond2); + VECTOR_COND_MOVE (lx, lx + lx, cond2); + VECTOR_ELSE2 (cond2, cond) + VECTOR_IF2 ((hz | lz) == 0, cond3, cond2) // return sign(x)*0 + VECTOR_RETURN (zeroes, cond3); VECTOR_ENDIF + VECTOR_COND_MOVE (hx, hz + hz + (__builtin_convertvector(lz, v64usi) >> 31), cond2); + VECTOR_COND_MOVE (lx, lz + lz, cond2); + VECTOR_ENDIF - n += cond; // Active lanes should be -1 - cond &= (n != 0); - } + VECTOR_COND_MOVE (n, n - 1, cond); + VECTOR_ENDWHILE hz = hx - hy; lz = lx - ly; @@ -164,15 +165,11 @@ DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y) /* convert back to floating value and restore the sign */ VECTOR_RETURN (zeroes, (hx | lx) == 0); // return sign(x)*0 - cond = hx < 0x00100000; - while (!ALL_ZEROES_P (cond)) // normalize x - { - VECTOR_COND_MOVE (hx, hx + hx + (lx >> 31), cond); - VECTOR_COND_MOVE (lx, lx + lx, cond); - iy += cond; // Active lanes should be -1 - - cond &= (hx < 0x00100000); - } + VECTOR_WHILE (hx < 0x00100000, cond) // normalize x + VECTOR_COND_MOVE (hx, hx + hx + (lx >> 31), cond); + VECTOR_COND_MOVE (lx, lx + lx, cond); + VECTOR_COND_MOVE (iy, iy - 1, cond); + VECTOR_ENDWHILE VECTOR_IF (iy >= -1022, cond) // normalize output VECTOR_COND_MOVE (hx, (hx - 0x00100000) | ((iy + 1023) << 20), cond); INSERT_WORDS (x, hx | sx, lx, cond); diff --git a/newlib/libm/machine/amdgcn/v64df_ilogb.c b/newlib/libm/machine/amdgcn/v64df_ilogb.c index 56eb700..5e99322 100644 --- a/newlib/libm/machine/amdgcn/v64df_ilogb.c +++ b/newlib/libm/machine/amdgcn/v64df_ilogb.c @@ -36,17 +36,19 @@ DEF_VD_MATH_PRED (v64si, ilogb, v64df x) VECTOR_IF (hx < 0x00100000, cond) VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond & ((hx | lx) == 0)); // FP_ILOGB0 VECTOR_IF2 (hx == 0, cond2, cond) - ix = VECTOR_INIT (-1043); - for (v64si i = lx; - !ALL_ZEROES_P (cond2 & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0)); + VECTOR_COND_MOVE (ix, VECTOR_INIT (-1043), cond2); + v64si i = lx; + VECTOR_WHILE2 (i > 0, cond3, cond2) + VECTOR_COND_MOVE (ix, ix - 1, cond3); + VECTOR_COND_MOVE (i, i << 1, cond3); + VECTOR_ENDWHILE VECTOR_ELSE2 (cond2, cond) - ix = VECTOR_INIT (-1022); - for (v64si i = (hx << 11); - !ALL_ZEROES_P (cond2 & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0)); + VECTOR_COND_MOVE (ix, VECTOR_INIT (-1022), cond2); + v64si i = (hx << 11); + VECTOR_WHILE2 (i > 0, cond3, cond2) + VECTOR_COND_MOVE (ix, ix - 1, cond3); + VECTOR_COND_MOVE (i, i << 1, cond3); + VECTOR_ENDWHILE VECTOR_ENDIF VECTOR_RETURN (ix, cond); VECTOR_ENDIF diff --git a/newlib/libm/machine/amdgcn/v64sf_fmod.c b/newlib/libm/machine/amdgcn/v64sf_fmod.c index b62b819..e4ddfc2 100644 --- a/newlib/libm/machine/amdgcn/v64sf_fmod.c +++ b/newlib/libm/machine/amdgcn/v64sf_fmod.c @@ -57,11 +57,12 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y) /* determine ix = ilogb(x) */ v64si ix; VECTOR_IF (hx < 0x00800000, cond) // subnormal x - ix = VECTOR_INIT (-126); - for (v64si i = (hx << 8); - !ALL_ZEROES_P (cond & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0)); + VECTOR_COND_MOVE (ix, VECTOR_INIT (-126), cond); + v64si i = hx << 8; + VECTOR_WHILE2 (i > 0, cond2, cond) + VECTOR_COND_MOVE (ix, ix - 1, cond2); + VECTOR_COND_MOVE (i, i << 1, cond2); + VECTOR_ENDWHILE VECTOR_ELSE (cond) VECTOR_COND_MOVE (ix, (hx >> 23) - 127, cond); VECTOR_ENDIF @@ -69,12 +70,12 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y) /* determine iy = ilogb(y) */ v64si iy; VECTOR_IF (hy < 0x00800000, cond) // subnormal y - iy = VECTOR_INIT (-126); - for (v64si i = (hy << 8); !ALL_ZEROES_P (cond & (i >= 0)); /* i <<= 1 */) - { - VECTOR_COND_MOVE (iy, iy - 1, cond & (i >= 0)); - VECTOR_COND_MOVE (i, i << 1, cond & (i >= 0)); - } + VECTOR_COND_MOVE (iy, VECTOR_INIT (-126), cond); + v64si i = (hy << 8); + VECTOR_WHILE2 (i >= 0, cond2, cond) + VECTOR_COND_MOVE (iy, iy - 1, cond2); + VECTOR_COND_MOVE (i, i << 1, cond2); + VECTOR_ENDWHILE VECTOR_ELSE (cond) VECTOR_COND_MOVE (iy, (hy >> 23) - 127, cond); VECTOR_ENDIF @@ -99,24 +100,21 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y) /* fix point fmod */ v64si n = ix - iy; - v64si cond = n != 0; - while (!ALL_ZEROES_P (cond)) - { - hz = hx - hy; - VECTOR_IF2 (hz < 0, cond2, cond) - VECTOR_COND_MOVE (hx, hx + hx, cond2); - VECTOR_ELSE2 (cond2, cond) - VECTOR_IF2 (hz == 0, cond3, cond2) // return sign(x)*0 - VECTOR_RETURN (zeroes, cond3); - VECTOR_ELSE2 (cond3, cond2) - VECTOR_COND_MOVE (hx, hz + hz, cond2); - VECTOR_ENDIF + VECTOR_WHILE (n != 0, cond) + hz = hx - hy; + VECTOR_IF2 (hz < 0, cond2, cond) + VECTOR_COND_MOVE (hx, hx + hx, cond2); + VECTOR_ELSE2 (cond2, cond) + VECTOR_IF2 (hz == 0, cond3, cond2) // return sign(x)*0 + VECTOR_RETURN (zeroes, cond3); + VECTOR_ELSE2 (cond3, cond2) + VECTOR_COND_MOVE (hx, hz + hz, cond2); VECTOR_ENDIF + VECTOR_ENDIF - n += cond; // Active lanes should be -1 - cond &= (n != 0); - } + n += cond; // Active lanes should be -1 + VECTOR_ENDWHILE hz = hx - hy; VECTOR_COND_MOVE (hx, hz, hz >= 0); @@ -124,14 +122,10 @@ DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y) /* convert back to floating value and restore the sign */ VECTOR_RETURN (zeroes, hx == 0); // return sign(x)*0 - cond = hx < 0x00800000; - while (!ALL_ZEROES_P (cond)) // normalize x - { + VECTOR_WHILE (hx < 0x00800000, cond) // normalize x VECTOR_COND_MOVE (hx, hx + hx, cond); iy += cond; // Active lanes should be -1 - - cond &= (hx < 0x00800000); - } + VECTOR_ENDWHILE VECTOR_IF (iy >= -126, cond) // normalize output VECTOR_COND_MOVE (hx, (hx - 0x00800000) | ((iy + 127) << 23), cond); SET_FLOAT_WORD (x, hx | sx, cond); diff --git a/newlib/libm/machine/amdgcn/v64sf_ilogb.c b/newlib/libm/machine/amdgcn/v64sf_ilogb.c index 2f2a7ca..ecaf996 100644 --- a/newlib/libm/machine/amdgcn/v64sf_ilogb.c +++ b/newlib/libm/machine/amdgcn/v64sf_ilogb.c @@ -38,11 +38,12 @@ DEF_VS_MATH_FUNC (v64si, ilogbf, v64sf x) VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond); // FP_ILOGB0 VECTOR_ENDIF VECTOR_IF (FLT_UWORD_IS_SUBNORMAL (hx), cond) - ix = VECTOR_INIT (-126); - for (v64si i = (hx << 8); - !ALL_ZEROES_P (cond & (i > 0)); - i <<= 1) - VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0)); + VECTOR_COND_MOVE (ix, VECTOR_INIT (-126), cond); + v64si i = (hx << 8); + VECTOR_WHILE2 (i > 0, cond2, cond) + VECTOR_COND_MOVE (ix, ix - 1, cond2); + VECTOR_COND_MOVE (i, i << 1, cond2); + VECTOR_ENDWHILE VECTOR_RETURN (ix, cond); VECTOR_ELSEIF (~FLT_UWORD_IS_FINITE (hx), cond) VECTOR_RETURN (VECTOR_INIT (__INT_MAX__), cond); |