aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ieee754/dbl-64/uexp.h
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2018-02-12 18:16:03 +0000
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2018-09-05 16:22:00 +0100
commite70c17682518fab2fad164fecf73341443bc2ed3 (patch)
treeb51780512cbcf9b0ded12c313b7e034bf7162273 /sysdeps/ieee754/dbl-64/uexp.h
parentb7cdc2aeb16c07fd9e6ec59f96f862b7fe2d3fdd (diff)
downloadglibc-e70c17682518fab2fad164fecf73341443bc2ed3.zip
glibc-e70c17682518fab2fad164fecf73341443bc2ed3.tar.gz
glibc-e70c17682518fab2fad164fecf73341443bc2ed3.tar.bz2
Add new exp and exp2 implementations
Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
Diffstat (limited to 'sysdeps/ieee754/dbl-64/uexp.h')
-rw-r--r--sysdeps/ieee754/dbl-64/uexp.h68
1 files changed, 0 insertions, 68 deletions
diff --git a/sysdeps/ieee754/dbl-64/uexp.h b/sysdeps/ieee754/dbl-64/uexp.h
deleted file mode 100644
index 64ab2c8..0000000
--- a/sysdeps/ieee754/dbl-64/uexp.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * Written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/******************************************************************/
-/* */
-/* MODULE_NAME:uexp.h */
-/* */
-/* common data and variables prototype and definition */
-/******************************************************************/
-
-#ifndef UEXP_H
-#define UEXP_H
-
-#include "mydefs.h"
-
-const static double zero = 0.0, hhuge = 1.0e300, tiny = 1.0e-300;
-const static int4 bigint = 0x40862002,
- badint = 0x40876000,smallint = 0x3C8fffff;
-const static int4 hugeint = 0x7FFFFFFF, infint = 0x7ff00000;
-
-#ifdef BIG_ENDI
-const static mynumber inf = {{0x7FF00000, 0}}; /* inf */
-const static mynumber t256 = {{0x4ff00000, 0}}; /* 2^256 */
-
-const static mynumber ln_two1 = {{0x3FE62E42, 0xFEFA3800}};/*0.69314718055989033 */
-const static mynumber ln_two2 = {{0x3D2EF357, 0x93C76730}};/*5.4979230187083712e-14*/
-const static mynumber log2e = {{0x3FF71547, 0x652B82FE}};/* 1.4426950408889634 */
-
-const static mynumber p2 = {{0x3FE00000, 0x000004DC}};/* 0.50000000000013811 */
-const static mynumber p3 = {{0x3FC55555, 0x55555A0F}};/* 0.16666666666670024 */
-
-const static mynumber three33 = {{0x42180000, 0}}; /* 25769803776 */
-const static mynumber three51 = {{0x43380000, 0}}; /* 6755399441055744 */
-
-#else
-#ifdef LITTLE_ENDI
- const static mynumber inf = {{0, 0x7FF00000}}; /* inf */
- const static mynumber t256 = {{0, 0x4ff00000}}; /* 2^256 */
-
- const static mynumber ln_two1 = {{0xFEFA3800, 0x3FE62E42}};/*0.69314718055989033 */
- const static mynumber ln_two2 = {{0x93C76730, 0x3D2EF357}};/*5.4979230187083712e-14*/
- const static mynumber log2e = {{0x652B82FE, 0x3FF71547}};/* 1.4426950408889634 */
-
- const static mynumber p2 = {{0x000004DC, 0x3FE00000}};/* 0.50000000000013811 */
- const static mynumber p3 = {{0x55555A0F, 0x3FC55555}};/* 0.16666666666670024 */
-
- const static mynumber three33 = {{0, 0x42180000}}; /* 25769803776 */
- const static mynumber three51 = {{0, 0x43380000}}; /* 6755399441055744 */
-
-#endif
-#endif
-#endif