From 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Fri, 29 Mar 2013 18:15:28 -0500 Subject: PowerPC: remove branch prediction from rint implementation The branch prediction hints is actually hurts performance in this case. The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a general floating point function, expected input is not bounded and then it is better to let the hardware handle the branches. --- sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 +++--- sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 +++--- sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 +++--- sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'sysdeps') diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S index f3cd036..f04055f 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S @@ -45,14 +45,14 @@ ENTRY (__rint) fsub fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S index 247dd4a..e0301af 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S @@ -41,14 +41,14 @@ ENTRY (__rintf) fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S index f333972..57e3759 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0) fsub fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S index 26b0872..cb28ec7 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0) fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fnabs fp1,fp1 /* if (x == 0.0) */ -- cgit v1.1