diff options
author | Pierre Blanchard <pierre.blanchard@arm.com> | 2025-08-20 17:41:50 +0000 |
---|---|---|
committer | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2025-08-20 17:45:21 +0000 |
commit | aac077645a645bba0d67f3250e82017c539d0f4b (patch) | |
tree | 4a142360f2cc2c6f34b6de8a5327a07f1ae1be35 | |
parent | 6f999af332c91035350390ef8af96388b8f4fd2c (diff) | |
download | glibc-aac077645a645bba0d67f3250e82017c539d0f4b.zip glibc-aac077645a645bba0d67f3250e82017c539d0f4b.tar.gz glibc-aac077645a645bba0d67f3250e82017c539d0f4b.tar.bz2 |
AArch64: Fix SVE powf routine [BZ #33299]
Fix a bug in predicate logic introduced in last change.
A slight performance improvement from relying on all true
predicates during conversion from single to double.
This fixes BZ #33299.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
-rw-r--r-- | sysdeps/aarch64/fpu/powf_sve.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c index 7046990..65e9bd2 100644 --- a/sysdeps/aarch64/fpu/powf_sve.c +++ b/sysdeps/aarch64/fpu/powf_sve.c @@ -223,15 +223,15 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, const svbool_t ptrue = svptrue_b64 (); /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two - * in order to perform core computation in double precision. */ + in order to perform core computation in double precision. */ const svbool_t pg_lo = svunpklo (pg); const svbool_t pg_hi = svunpkhi (pg); - svfloat64_t y_lo - = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); - svfloat64_t y_hi - = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); - svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz))); - svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz))); + svfloat64_t y_lo = svcvt_f64_x ( + ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); + svfloat64_t y_hi = svcvt_f64_x ( + ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); + svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz))); + svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz))); svuint64_t i_lo = svunpklo (i); svuint64_t i_hi = svunpkhi (i); svint64_t k_lo = svunpklo (k); @@ -312,7 +312,7 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) (23 - V_POWF_EXP2_TABLE_BITS)); /* Compute core in extended precision and return intermediate ylogx results - * to handle cases of underflow and underflow in exp. */ + to handle cases of underflow and overflow in exp. */ svfloat32_t ylogx; svfloat32_t ret = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d); |