diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2014-08-07 16:29:55 +0000 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2014-08-07 16:29:55 +0000 |
commit | 656b84c2ef525e3b69802c9057c5897e327b0332 (patch) | |
tree | b6cf594f11d613c778e7c4cbd063c316a42ce5b2 | |
parent | 538e9e454de75c5528d1a7c8f57ede9ccad39d00 (diff) | |
download | glibc-656b84c2ef525e3b69802c9057c5897e327b0332.zip glibc-656b84c2ef525e3b69802c9057c5897e327b0332.tar.gz glibc-656b84c2ef525e3b69802c9057c5897e327b0332.tar.bz2 |
This patch adds new function libc_feholdsetround_noex_aarch64_ctx, enabling
further optimization. libc_feholdsetround_aarch64_ctx now only needs to
read the FPCR in the typical case, avoiding a redundant FPSR read.
Performance results show a good improvement (5-10% on sin()) on cores with
expensive FPCR/FPSR instructions.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/math_private.h | 30 |
2 files changed, 32 insertions, 3 deletions
@@ -1,5 +1,10 @@ 2014-08-07 Wilco Dijkstra <wdijkstr@arm.com> + * sysdeps/aarch64/fpu/math_private.h + (libc_feholdsetround_noex_aarch64_ctx): New function. + +2014-08-07 Wilco Dijkstra <wdijkstr@arm.com> + * sysdeps/arm/armv6/strcpy.S (strcpy): Fix performance issue in misaligned cases. diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h index 023c9d0..b13c030 100644 --- a/sysdeps/aarch64/fpu/math_private.h +++ b/sysdeps/aarch64/fpu/math_private.h @@ -228,12 +228,9 @@ static __always_inline void libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r) { fpu_control_t fpcr; - fpu_fpsr_t fpsr; int round; _FPU_GETCW (fpcr); - _FPU_GETFPSR (fpsr); - ctx->env.__fpsr = fpsr; /* Check whether rounding modes are different. */ round = (fpcr ^ r) & _FPU_FPCR_RM_MASK; @@ -264,6 +261,33 @@ libc_feresetround_aarch64_ctx (struct rm_ctx *ctx) #define libc_feresetroundl_ctx libc_feresetround_aarch64_ctx static __always_inline void +libc_feholdsetround_noex_aarch64_ctx (struct rm_ctx *ctx, int r) +{ + fpu_control_t fpcr; + fpu_fpsr_t fpsr; + int round; + + _FPU_GETCW (fpcr); + _FPU_GETFPSR (fpsr); + ctx->env.__fpsr = fpsr; + + /* Check whether rounding modes are different. */ + round = (fpcr ^ r) & _FPU_FPCR_RM_MASK; + ctx->updated_status = round != 0; + + /* Set the rounding mode if changed. */ + if (__glibc_unlikely (round != 0)) + { + ctx->env.__fpcr = fpcr; + _FPU_SETCW (fpcr ^ round); + } +} + +#define libc_feholdsetround_noex_ctx libc_feholdsetround_noex_aarch64_ctx +#define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_aarch64_ctx +#define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_aarch64_ctx + +static __always_inline void libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx) { /* Restore the rounding mode if updated. */ |