diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2015-08-05 14:57:37 +0100 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2015-08-05 16:24:01 +0100 |
commit | 3136eb7abd3e45a8622c0272181816c1a92e1f65 (patch) | |
tree | 5aeae1b79760c432d5983c725b656f5c0c8aee50 | |
parent | 782723d6d8c7b599c4fdbbd359a7bc57b25044ad (diff) | |
download | glibc-3136eb7abd3e45a8622c0272181816c1a92e1f65.zip glibc-3136eb7abd3e45a8622c0272181816c1a92e1f65.tar.gz glibc-3136eb7abd3e45a8622c0272181816c1a92e1f65.tar.bz2 |
Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.
It uses the same logic as the ARM version. The common case removes 1 FPSR
and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
case the FPCR does not change.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/fesetenv.c | 40 |
2 files changed, 28 insertions, 17 deletions
@@ -1,3 +1,8 @@ +2015-08-05 Wilco Dijkstra <wdijkstr@arm.com> + + * sysdeps/aarch64/fpu/fesetenv.c (fesetenv): + Optimize to reduce FPCR/FPSR accesses. + 2015-08-05 H.J. Lu <hongjiu.lu@intel.com> * locale/loadarchive.c (_nl_archive_subfreeres): Also check diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c index f47115f..bd56187 100644 --- a/sysdeps/aarch64/fpu/fesetenv.c +++ b/sysdeps/aarch64/fpu/fesetenv.c @@ -29,8 +29,20 @@ __fesetenv (const fenv_t *envp) fpu_fpsr_t fpsr_new; _FPU_GETCW (fpcr); - _FPU_GETFPSR (fpsr); + if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV)) + { + /* The new FPCR/FPSR are valid, so don't merge the reserved flags. */ + fpcr_new = envp->__fpcr; + + if (fpcr != fpcr_new) + _FPU_SETCW (fpcr_new); + + _FPU_SETFPSR (envp->__fpsr); + return 0; + } + + _FPU_GETFPSR (fpsr); fpcr_new = fpcr & _FPU_RESERVED; fpsr_new = fpsr & _FPU_FPSR_RESERVED; @@ -39,31 +51,25 @@ __fesetenv (const fenv_t *envp) fpcr_new |= _FPU_DEFAULT; fpsr_new |= _FPU_FPSR_DEFAULT; } - else if (envp == FE_NOMASK_ENV) + else { fpcr_new |= _FPU_FPCR_IEEE; fpsr_new |= _FPU_FPSR_IEEE; } - else - { - fpcr_new |= envp->__fpcr & ~_FPU_RESERVED; - fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED; - } - if (fpsr != fpsr_new) - _FPU_SETFPSR (fpsr_new); + _FPU_SETFPSR (fpsr_new); if (fpcr != fpcr_new) - _FPU_SETCW (fpcr_new); + { + _FPU_SETCW (fpcr_new); - /* Trapping exceptions are optional in AArch64 the relevant enable - bits in FPCR are RES0 hence the absence of support can be - detected by reading back the FPCR and comparing with the required - value. */ + /* Trapping exceptions are optional in AArch64; the relevant enable + bits in FPCR are RES0 hence the absence of support can be detected + by reading back the FPCR and comparing with the required value. */ + _FPU_GETCW (updated_fpcr); - _FPU_GETCW (updated_fpcr); - if ((updated_fpcr & fpcr_new) != fpcr_new) - return 1; + return fpcr_new & ~updated_fpcr; + } return 0; } |