diff options
author | Joseph Myers <joseph@codesourcery.com> | 2014-06-30 17:37:10 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2014-06-30 17:37:10 +0000 |
commit | 3a6e988706d1ce2c88c933619d1f312098698ecd (patch) | |
tree | 90ec79e99524bb80b7948755a24273ccf824da78 | |
parent | 9c37ec0b8929c57ea83230507d7742ca9a8888bc (diff) | |
download | glibc-3a6e988706d1ce2c88c933619d1f312098698ecd.zip glibc-3a6e988706d1ce2c88c933619d1f312098698ecd.tar.gz glibc-3a6e988706d1ce2c88c933619d1f312098698ecd.tar.bz2 |
Rename soft-fp op-[1248].h variables to avoid risk of shadowing.
Continuing the soft-fp variable renaming from
<https://sourceware.org/ml/libc-alpha/2014-06/msg00434.html> to avoid
shadowing if two macros happen to use the same variable name and that
variable is involved in an argument one of those macros passes to
another, this patch renames variables in op-[1248].h. (The two
patches are to different files and are independent of each other.)
Tested for powerpc32 (soft-float) and mips64 that this makes no change
to the disassembly of installed shared libraries.
* soft-fp/op-1.h (_FP_UNPACK_RAW_1): Rename local variables to
include macro name.
(_FP_UNPACK_RAW_1_P): Likewise.
(_FP_PACK_RAW_1): Likewise.
(_FP_PACK_RAW_1_P): Likewise.
(_FP_MUL_MEAT_1_wide): Likewise.
(_FP_MUL_MEAT_DW_1_hard): Likewise.
(_FP_MUL_MEAT_1_hard): Likewise.
(_FP_DIV_MEAT_1_imm): Likewise.
(_FP_DIV_MEAT_1_udiv_norm): Likewise.
(_FP_DIV_MEAT_1_udiv): Likewise.
* soft-fp/op-2.h (__FP_FRAC_DEC_2): Likewise.
(_FP_UNPACK_RAW_2): Likewise.
(_FP_UNPACK_RAW_2_P): Likewise.
(_FP_PACK_RAW_2): Likewise.
(_FP_PACK_RAW_2_P): Likewise.
(_FP_MUL_MEAT_DW_2_wide): Likewise.
(_FP_MUL_MEAT_2_wide): Likewise.
(_FP_MUL_MEAT_DW_2_wide_3mul): Likewise.
(_FP_MUL_MEAT_2_wide_3mul): Likewise.
(_FP_MUL_MEAT_DW_2_gmp): Likewise.
(_FP_MUL_MEAT_2_gmp): Likewise.
(_FP_DIV_MEAT_2_udiv): Likewise.
* soft-fp/op-4.h (_FP_FRAC_SLL_4): Likewise.
(_FP_FRAC_SRL_4): Likewise.
(_FP_FRAC_SRST_4): Likewise.
(_FP_FRAC_SRS_4): Likewise.
(_FP_UNPACK_RAW_4): Likewise.
(_FP_UNPACK_RAW_4_P): Likewise.
(_FP_PACK_RAW_4): Likewise.
(_FP_PACK_RAW_4_P): Likewise.
(_FP_MUL_MEAT_DW_4_wide): Likewise.
(_FP_MUL_MEAT_4_wide): Likewise.
(_FP_MUL_MEAT_4_gmp): Likewise.
(umul_ppppmnnn): Likewise.
(_FP_DIV_MEAT_4_udiv): Likewise.
(__FP_FRAC_ADD_4): Likewise.
(__FP_FRAC_SUB_3): Likewise.
(__FP_FRAC_SUB_4): Likewise.
(__FP_FRAC_DEC_3): Likewise.
(__FP_FRAC_DEC_4): Likewise.
(__FP_FRAC_ADDI_4): Likewise.
* soft-fp/op-8.h (_FP_FRAC_SLL_8): Likewise.
(_FP_FRAC_SRL_8): Likewise.
(_FP_FRAC_SRS_8): Likewise.
-rw-r--r-- | ChangeLog | 46 | ||||
-rw-r--r-- | soft-fp/op-1.h | 218 | ||||
-rw-r--r-- | soft-fp/op-2.h | 298 | ||||
-rw-r--r-- | soft-fp/op-4.h | 574 | ||||
-rw-r--r-- | soft-fp/op-8.h | 146 |
5 files changed, 772 insertions, 510 deletions
@@ -1,5 +1,51 @@ 2014-06-30 Joseph Myers <joseph@codesourcery.com> + * soft-fp/op-1.h (_FP_UNPACK_RAW_1): Rename local variables to + include macro name. + (_FP_UNPACK_RAW_1_P): Likewise. + (_FP_PACK_RAW_1): Likewise. + (_FP_PACK_RAW_1_P): Likewise. + (_FP_MUL_MEAT_1_wide): Likewise. + (_FP_MUL_MEAT_DW_1_hard): Likewise. + (_FP_MUL_MEAT_1_hard): Likewise. + (_FP_DIV_MEAT_1_imm): Likewise. + (_FP_DIV_MEAT_1_udiv_norm): Likewise. + (_FP_DIV_MEAT_1_udiv): Likewise. + * soft-fp/op-2.h (__FP_FRAC_DEC_2): Likewise. + (_FP_UNPACK_RAW_2): Likewise. + (_FP_UNPACK_RAW_2_P): Likewise. + (_FP_PACK_RAW_2): Likewise. + (_FP_PACK_RAW_2_P): Likewise. + (_FP_MUL_MEAT_DW_2_wide): Likewise. + (_FP_MUL_MEAT_2_wide): Likewise. + (_FP_MUL_MEAT_DW_2_wide_3mul): Likewise. + (_FP_MUL_MEAT_2_wide_3mul): Likewise. + (_FP_MUL_MEAT_DW_2_gmp): Likewise. + (_FP_MUL_MEAT_2_gmp): Likewise. + (_FP_DIV_MEAT_2_udiv): Likewise. + * soft-fp/op-4.h (_FP_FRAC_SLL_4): Likewise. + (_FP_FRAC_SRL_4): Likewise. + (_FP_FRAC_SRST_4): Likewise. + (_FP_FRAC_SRS_4): Likewise. + (_FP_UNPACK_RAW_4): Likewise. + (_FP_UNPACK_RAW_4_P): Likewise. + (_FP_PACK_RAW_4): Likewise. + (_FP_PACK_RAW_4_P): Likewise. + (_FP_MUL_MEAT_DW_4_wide): Likewise. + (_FP_MUL_MEAT_4_wide): Likewise. + (_FP_MUL_MEAT_4_gmp): Likewise. + (umul_ppppmnnn): Likewise. + (_FP_DIV_MEAT_4_udiv): Likewise. + (__FP_FRAC_ADD_4): Likewise. + (__FP_FRAC_SUB_3): Likewise. + (__FP_FRAC_SUB_4): Likewise. + (__FP_FRAC_DEC_3): Likewise. + (__FP_FRAC_DEC_4): Likewise. + (__FP_FRAC_ADDI_4): Likewise. + * soft-fp/op-8.h (_FP_FRAC_SLL_8): Likewise. + (_FP_FRAC_SRL_8): Likewise. + (_FP_FRAC_SRS_8): Likewise. + * soft-fp/extended.h (FP_UNPACK_RAW_E): Rename local variables to include macro name. (FP_UNPACK_RAW_EP): Likewise. diff --git a/soft-fp/op-1.h b/soft-fp/op-1.h index 33682cf..3547e74 100644 --- a/soft-fp/op-1.h +++ b/soft-fp/op-1.h @@ -92,27 +92,28 @@ * normalize the data. */ -#define _FP_UNPACK_RAW_1(fs, X, val) \ - do \ - { \ - union _FP_UNION_##fs _flo; \ - _flo.flt = (val); \ - \ - X##_f = _flo.bits.frac; \ - X##_e = _flo.bits.exp; \ - X##_s = _flo.bits.sign; \ - } \ +#define _FP_UNPACK_RAW_1(fs, X, val) \ + do \ + { \ + union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo; \ + _FP_UNPACK_RAW_1_flo.flt = (val); \ + \ + X##_f = _FP_UNPACK_RAW_1_flo.bits.frac; \ + X##_e = _FP_UNPACK_RAW_1_flo.bits.exp; \ + X##_s = _FP_UNPACK_RAW_1_flo.bits.sign; \ + } \ while (0) -#define _FP_UNPACK_RAW_1_P(fs, X, val) \ - do \ - { \ - union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ - \ - X##_f = _flo->bits.frac; \ - X##_e = _flo->bits.exp; \ - X##_s = _flo->bits.sign; \ - } \ +#define _FP_UNPACK_RAW_1_P(fs, X, val) \ + do \ + { \ + union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo \ + = (union _FP_UNION_##fs *) (val); \ + \ + X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac; \ + X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp; \ + X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign; \ + } \ while (0) /* @@ -122,25 +123,26 @@ #define _FP_PACK_RAW_1(fs, val, X) \ do \ { \ - union _FP_UNION_##fs _flo; \ + union _FP_UNION_##fs _FP_PACK_RAW_1_flo; \ \ - _flo.bits.frac = X##_f; \ - _flo.bits.exp = X##_e; \ - _flo.bits.sign = X##_s; \ + _FP_PACK_RAW_1_flo.bits.frac = X##_f; \ + _FP_PACK_RAW_1_flo.bits.exp = X##_e; \ + _FP_PACK_RAW_1_flo.bits.sign = X##_s; \ \ - (val) = _flo.flt; \ + (val) = _FP_PACK_RAW_1_flo.flt; \ } \ while (0) -#define _FP_PACK_RAW_1_P(fs, val, X) \ - do \ - { \ - union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ - \ - _flo->bits.frac = X##_f; \ - _flo->bits.exp = X##_e; \ - _flo->bits.sign = X##_s; \ - } \ +#define _FP_PACK_RAW_1_P(fs, val, X) \ + do \ + { \ + union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo \ + = (union _FP_UNION_##fs *) (val); \ + \ + _FP_PACK_RAW_1_P_flo->bits.frac = X##_f; \ + _FP_PACK_RAW_1_P_flo->bits.exp = X##_e; \ + _FP_PACK_RAW_1_P_flo->bits.sign = X##_s; \ + } \ while (0) @@ -181,13 +183,14 @@ #define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_2 (_Z); \ - _FP_MUL_MEAT_DW_1_wide (wfracbits, _Z, X, Y, doit); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z); \ + _FP_MUL_MEAT_DW_1_wide (wfracbits, _FP_MUL_MEAT_1_wide_Z, \ + X, Y, doit); \ /* Normalize since we know where the msb of the multiplicands \ were (bit B), we know that the msb of the of the product is \ at either 2B or 2B-1. */ \ - _FP_FRAC_SRS_2 (_Z, wfracbits-1, 2*wfracbits); \ - R##_f = _Z_f0; \ + _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, wfracbits-1, 2*wfracbits); \ + R##_f = _FP_MUL_MEAT_1_wide_Z_f0; \ } \ while (0) @@ -196,40 +199,49 @@ #define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \ do \ { \ - _FP_W_TYPE _xh, _xl, _yh, _yl; \ - _FP_FRAC_DECL_2 (_a); \ + _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl; \ + _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl; \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a); \ \ /* split the words in half */ \ - _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ - _xl = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ - _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ - _yl = Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ + _FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ + _FP_MUL_MEAT_DW_1_hard_xl \ + = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ + _FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ + _FP_MUL_MEAT_DW_1_hard_yl \ + = Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ \ /* multiply the pieces */ \ - R##_f0 = _xl * _yl; \ - _a_f0 = _xh * _yl; \ - _a_f1 = _xl * _yh; \ - R##_f1 = _xh * _yh; \ + R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl; \ + _FP_MUL_MEAT_DW_1_hard_a_f0 \ + = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl; \ + _FP_MUL_MEAT_DW_1_hard_a_f1 \ + = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh; \ + R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh; \ \ /* reassemble into two full words */ \ - if ((_a_f0 += _a_f1) < _a_f1) \ + if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1) \ + < _FP_MUL_MEAT_DW_1_hard_a_f1) \ R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \ - _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \ - _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \ - _FP_FRAC_ADD_2 (R, R, _a); \ + _FP_MUL_MEAT_DW_1_hard_a_f1 \ + = _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2); \ + _FP_MUL_MEAT_DW_1_hard_a_f0 \ + = _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2); \ + _FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a); \ } \ while (0) -#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \ - do \ - { \ - _FP_FRAC_DECL_2 (_z); \ - _FP_MUL_MEAT_DW_1_hard (wfracbits, _z, X, Y); \ - \ - /* normalize */ \ - _FP_FRAC_SRS_2 (_z, wfracbits - 1, 2*wfracbits); \ - R##_f = _z_f0; \ - } \ +#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \ + do \ + { \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z); \ + _FP_MUL_MEAT_DW_1_hard (wfracbits, _FP_MUL_MEAT_1_hard_z, X, Y); \ + \ + /* normalize */ \ + _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z, \ + wfracbits - 1, 2*wfracbits); \ + R##_f = _FP_MUL_MEAT_1_hard_z_f0; \ + } \ while (0) @@ -242,16 +254,16 @@ C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you choose will depend on what the compiler does with divrem4. */ -#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ - do \ - { \ - _FP_W_TYPE _q, _r; \ - X##_f <<= (X##_f < Y##_f \ - ? R##_e--, _FP_WFRACBITS_##fs \ - : _FP_WFRACBITS_##fs - 1); \ - doit (_q, _r, X##_f, Y##_f); \ - R##_f = _q | (_r != 0); \ - } \ +#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ + do \ + { \ + _FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r; \ + X##_f <<= (X##_f < Y##_f \ + ? R##_e--, _FP_WFRACBITS_##fs \ + : _FP_WFRACBITS_##fs - 1); \ + doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f); \ + R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0); \ + } \ while (0) /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd @@ -262,47 +274,59 @@ #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ do \ { \ - _FP_W_TYPE _nh, _nl, _q, _r, _y; \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh; \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl; \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q; \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r; \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y; \ \ /* Normalize Y -- i.e. make the most significant bit set. */ \ - _y = Y##_f << _FP_WFRACXBITS_##fs; \ + _FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs; \ \ /* Shift X op correspondingly high, that is, up one full word. */ \ if (X##_f < Y##_f) \ { \ R##_e--; \ - _nl = 0; \ - _nh = X##_f; \ + _FP_DIV_MEAT_1_udiv_norm_nl = 0; \ + _FP_DIV_MEAT_1_udiv_norm_nh = X##_f; \ } \ else \ { \ - _nl = X##_f << (_FP_W_TYPE_SIZE - 1); \ - _nh = X##_f >> 1; \ + _FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \ + _FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1; \ } \ \ - udiv_qrnnd (_q, _r, _nh, _nl, _y); \ - R##_f = _q | (_r != 0); \ + udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q, \ + _FP_DIV_MEAT_1_udiv_norm_r, \ + _FP_DIV_MEAT_1_udiv_norm_nh, \ + _FP_DIV_MEAT_1_udiv_norm_nl, \ + _FP_DIV_MEAT_1_udiv_norm_y); \ + R##_f = (_FP_DIV_MEAT_1_udiv_norm_q \ + | (_FP_DIV_MEAT_1_udiv_norm_r != 0)); \ } \ while (0) -#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ - do \ - { \ - _FP_W_TYPE _nh, _nl, _q, _r; \ - if (X##_f < Y##_f) \ - { \ - R##_e--; \ - _nl = X##_f << _FP_WFRACBITS_##fs; \ - _nh = X##_f >> _FP_WFRACXBITS_##fs; \ - } \ - else \ - { \ - _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ - _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ - } \ - udiv_qrnnd (_q, _r, _nh, _nl, Y##_f); \ - R##_f = _q | (_r != 0); \ - } \ +#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ + do \ + { \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl; \ + _FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r; \ + if (X##_f < Y##_f) \ + { \ + R##_e--; \ + _FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs; \ + _FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs; \ + } \ + else \ + { \ + _FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ + _FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ + } \ + udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r, \ + _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl, \ + Y##_f); \ + R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0); \ + } \ while (0) diff --git a/soft-fp/op-2.h b/soft-fp/op-2.h index 160990f..4ea2a00 100644 --- a/soft-fp/op-2.h +++ b/soft-fp/op-2.h @@ -182,12 +182,12 @@ (rh = xh - yh - ((rl = xl - yl) > xl)) # endif # ifndef __FP_FRAC_DEC_2 -# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ - do \ - { \ - UWtype _t = xl; \ - xh -= yh + ((xl -= yl) > _t); \ - } \ +# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ + do \ + { \ + UWtype __FP_FRAC_DEC_2_t = xl; \ + xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \ + } \ while (0) # endif @@ -210,29 +210,30 @@ * normalize the data. */ -#define _FP_UNPACK_RAW_2(fs, X, val) \ - do \ - { \ - union _FP_UNION_##fs _flo; \ - _flo.flt = (val); \ - \ - X##_f0 = _flo.bits.frac0; \ - X##_f1 = _flo.bits.frac1; \ - X##_e = _flo.bits.exp; \ - X##_s = _flo.bits.sign; \ - } \ +#define _FP_UNPACK_RAW_2(fs, X, val) \ + do \ + { \ + union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \ + _FP_UNPACK_RAW_2_flo.flt = (val); \ + \ + X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \ + X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \ + X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \ + X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \ + } \ while (0) -#define _FP_UNPACK_RAW_2_P(fs, X, val) \ - do \ - { \ - union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ - \ - X##_f0 = _flo->bits.frac0; \ - X##_f1 = _flo->bits.frac1; \ - X##_e = _flo->bits.exp; \ - X##_s = _flo->bits.sign; \ - } \ +#define _FP_UNPACK_RAW_2_P(fs, X, val) \ + do \ + { \ + union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \ + = (union _FP_UNION_##fs *) (val); \ + \ + X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \ + X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \ + X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \ + X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \ + } \ while (0) @@ -243,27 +244,28 @@ #define _FP_PACK_RAW_2(fs, val, X) \ do \ { \ - union _FP_UNION_##fs _flo; \ + union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \ \ - _flo.bits.frac0 = X##_f0; \ - _flo.bits.frac1 = X##_f1; \ - _flo.bits.exp = X##_e; \ - _flo.bits.sign = X##_s; \ + _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \ + _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \ + _FP_PACK_RAW_2_flo.bits.exp = X##_e; \ + _FP_PACK_RAW_2_flo.bits.sign = X##_s; \ \ - (val) = _flo.flt; \ + (val) = _FP_PACK_RAW_2_flo.flt; \ } \ while (0) -#define _FP_PACK_RAW_2_P(fs, val, X) \ - do \ - { \ - union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ - \ - _flo->bits.frac0 = X##_f0; \ - _flo->bits.frac1 = X##_f1; \ - _flo->bits.exp = X##_e; \ - _flo->bits.sign = X##_s; \ - } \ +#define _FP_PACK_RAW_2_P(fs, val, X) \ + do \ + { \ + union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \ + = (union _FP_UNION_##fs *) (val); \ + \ + _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \ + _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \ + _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \ + _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \ + } \ while (0) @@ -276,20 +278,28 @@ #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_2 (_b); \ - _FP_FRAC_DECL_2 (_c); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \ \ - doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \ - doit (_b_f1, _b_f0, X##_f0, Y##_f1); \ - doit (_c_f1, _c_f0, X##_f1, Y##_f0); \ - doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \ + doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \ + X##_f0, Y##_f0); \ + doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \ + X##_f0, Y##_f1); \ + doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \ + X##_f1, Y##_f0); \ + doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ + X##_f1, Y##_f1); \ \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0, \ + _FP_FRAC_WORD_4 (R, 1), 0, \ + _FP_MUL_MEAT_DW_2_wide_b_f1, \ + _FP_MUL_MEAT_DW_2_wide_b_f0, \ _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ _FP_FRAC_WORD_4 (R, 1)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0, \ + _FP_FRAC_WORD_4 (R, 1), 0, \ + _FP_MUL_MEAT_DW_2_wide_c_f1, \ + _FP_MUL_MEAT_DW_2_wide_c_f0, \ _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ _FP_FRAC_WORD_4 (R, 1)); \ } \ @@ -298,16 +308,17 @@ #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_4 (_z); \ + _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \ \ - _FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit); \ + _FP_MUL_MEAT_DW_2_wide (wfracbits, _FP_MUL_MEAT_2_wide_z, \ + X, Y, doit); \ \ /* Normalize since we know where the msb of the multiplicands \ were (bit B), we know that the msb of the of the product is \ at either 2B or 2B-1. */ \ - _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \ - R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \ - R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \ + _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, wfracbits-1, 2*wfracbits); \ + R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \ + R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \ } \ while (0) @@ -318,35 +329,51 @@ #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_2 (_b); \ - _FP_FRAC_DECL_2 (_c); \ - _FP_W_TYPE _d; \ - int _c1, _c2; \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \ + _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \ + int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \ + int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \ \ - _b_f0 = X##_f0 + X##_f1; \ - _c1 = _b_f0 < X##_f0; \ - _b_f1 = Y##_f0 + Y##_f1; \ - _c2 = _b_f1 < Y##_f0; \ - doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \ - doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \ - doit (_c_f1, _c_f0, X##_f1, Y##_f1); \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \ + _FP_MUL_MEAT_DW_2_wide_3mul_c1 \ + = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \ + _FP_MUL_MEAT_DW_2_wide_3mul_c2 \ + = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \ + doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \ + X##_f0, Y##_f0); \ + doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \ + doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ + _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \ \ - _b_f0 &= -_c2; \ - _b_f1 &= -_c1; \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \ + &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \ + &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d, \ + _FP_FRAC_WORD_4 (R, 1), \ + (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \ + & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \ + _FP_MUL_MEAT_DW_2_wide_3mul_d, \ 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \ __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _b_f0); \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \ __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _b_f1); \ + _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \ __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ _FP_FRAC_WORD_4 (R, 1), \ - 0, _d, _FP_FRAC_WORD_4 (R, 0)); \ + 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \ + _FP_FRAC_WORD_4 (R, 0)); \ __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0); \ + _FP_FRAC_WORD_4 (R, 1), 0, \ + _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ + _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \ __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ - _c_f1, _c_f0, \ + _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ + _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \ _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \ } \ while (0) @@ -354,45 +381,50 @@ #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_4 (_z); \ + _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \ \ - _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit); \ + _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, \ + _FP_MUL_MEAT_2_wide_3mul_z, \ + X, Y, doit); \ \ /* Normalize since we know where the msb of the multiplicands \ were (bit B), we know that the msb of the of the product is \ at either 2B or 2B-1. */ \ - _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \ - R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \ - R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \ + _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \ + wfracbits-1, 2*wfracbits); \ + R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \ + R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \ } \ while (0) #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \ do \ { \ - _FP_W_TYPE _x[2], _y[2]; \ - _x[0] = X##_f0; \ - _x[1] = X##_f1; \ - _y[0] = Y##_f0; \ - _y[1] = Y##_f1; \ + _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \ + _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \ + _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \ + _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \ + _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \ + _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \ \ - mpn_mul_n (R##_f, _x, _y, 2); \ + mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \ + _FP_MUL_MEAT_DW_2_gmp_y, 2); \ } \ while (0) #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ do \ { \ - _FP_FRAC_DECL_4 (_z); \ + _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \ \ - _FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y); \ + _FP_MUL_MEAT_DW_2_gmp (wfracbits, _FP_MUL_MEAT_2_gmp_z, X, Y); \ \ /* Normalize since we know where the msb of the multiplicands \ were (bit B), we know that the msb of the of the product is \ at either 2B or 2B-1. */ \ - _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \ - R##_f0 = _z_f[0]; \ - R##_f1 = _z_f[1]; \ + _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, wfracbits-1, 2*wfracbits); \ + R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \ + R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \ } \ while (0) @@ -507,67 +539,91 @@ #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ do \ { \ - _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \ + _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \ if (_FP_FRAC_GE_2 (X, Y)) \ { \ - _n_f2 = X##_f1 >> 1; \ - _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ - _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ + _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \ + _FP_DIV_MEAT_2_udiv_n_f1 \ + = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ + _FP_DIV_MEAT_2_udiv_n_f0 \ + = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ } \ else \ { \ R##_e--; \ - _n_f2 = X##_f1; \ - _n_f1 = X##_f0; \ - _n_f0 = 0; \ + _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \ + _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \ + _FP_DIV_MEAT_2_udiv_n_f0 = 0; \ } \ \ /* Normalize, i.e. make the most significant bit of the \ denominator set. */ \ _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \ \ - udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ - umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0); \ - _r_f0 = _n_f0; \ - if (_FP_FRAC_GT_2 (_m, _r)) \ + udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \ + _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \ + Y##_f1); \ + umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \ + R##_f1, Y##_f0); \ + _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \ + if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \ { \ R##_f1--; \ - _FP_FRAC_ADD_2 (_r, Y, _r); \ - if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \ + _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ + _FP_DIV_MEAT_2_udiv_r); \ + if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \ + && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ + _FP_DIV_MEAT_2_udiv_r)) \ { \ R##_f1--; \ - _FP_FRAC_ADD_2 (_r, Y, _r); \ + _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ + _FP_DIV_MEAT_2_udiv_r); \ } \ } \ - _FP_FRAC_DEC_2 (_r, _m); \ + _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \ \ - if (_r_f1 == Y##_f1) \ + if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \ { \ /* This is a special case, not an optimization \ - (_r/Y##_f1 would not fit into UWtype). \ - As _r is guaranteed to be < Y, R##_f0 can be either \ - (UWtype)-1 or (UWtype)-2. But as we know what kind \ - of bits it is (sticky, guard, round), we don't care. \ - We also don't care what the reminder is, because the \ - guard bit will be set anyway. -jj */ \ + (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \ + As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \ + R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \ + know what kind of bits it is (sticky, guard, round), \ + we don't care. We also don't care what the reminder is, \ + because the guard bit will be set anyway. -jj */ \ R##_f0 = -1; \ } \ else \ { \ - udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ - umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0); \ - _r_f0 = 0; \ - if (_FP_FRAC_GT_2 (_m, _r)) \ + udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \ + _FP_DIV_MEAT_2_udiv_r_f1, \ + _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \ + umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \ + _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \ + _FP_DIV_MEAT_2_udiv_r_f0 = 0; \ + if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ + _FP_DIV_MEAT_2_udiv_r)) \ { \ R##_f0--; \ - _FP_FRAC_ADD_2 (_r, Y, _r); \ - if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \ + _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ + _FP_DIV_MEAT_2_udiv_r); \ + if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \ + && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ + _FP_DIV_MEAT_2_udiv_r)) \ { \ R##_f0--; \ - _FP_FRAC_ADD_2 (_r, Y, _r); \ + _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ + _FP_DIV_MEAT_2_udiv_r); \ } \ } \ - if (!_FP_FRAC_EQ_2 (_r, _m)) \ + if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \ + _FP_DIV_MEAT_2_udiv_m)) \ R##_f0 |= _FP_WORK_STICKY; \ } \ } \ diff --git a/soft-fp/op-4.h b/soft-fp/op-4.h index a80bdb2..3acf96c 100644 --- a/soft-fp/op-4.h +++ b/soft-fp/op-4.h @@ -39,49 +39,67 @@ #define _FP_FRAC_LOW_4(X) (X##_f[0]) #define _FP_FRAC_WORD_4(X, w) (X##_f[w]) -#define _FP_FRAC_SLL_4(X, N) \ - do \ - { \ - _FP_I_TYPE _up, _down, _skip, _i; \ - _skip = (N) / _FP_W_TYPE_SIZE; \ - _up = (N) % _FP_W_TYPE_SIZE; \ - _down = _FP_W_TYPE_SIZE - _up; \ - if (!_up) \ - for (_i = 3; _i >= _skip; --_i) \ - X##_f[_i] = X##_f[_i-_skip]; \ - else \ - { \ - for (_i = 3; _i > _skip; --_i) \ - X##_f[_i] = (X##_f[_i-_skip] << _up \ - | X##_f[_i-_skip-1] >> _down); \ - X##_f[_i--] = X##_f[0] << _up; \ - } \ - for (; _i >= 0; --_i) \ - X##_f[_i] = 0; \ - } \ +#define _FP_FRAC_SLL_4(X, N) \ + do \ + { \ + _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \ + _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \ + _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \ + _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \ + _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \ + if (!_FP_FRAC_SLL_4_up) \ + for (_FP_FRAC_SLL_4_i = 3; \ + _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \ + --_FP_FRAC_SLL_4_i) \ + X##_f[_FP_FRAC_SLL_4_i] \ + = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \ + else \ + { \ + for (_FP_FRAC_SLL_4_i = 3; \ + _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \ + --_FP_FRAC_SLL_4_i) \ + X##_f[_FP_FRAC_SLL_4_i] \ + = ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \ + << _FP_FRAC_SLL_4_up) \ + | (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \ + >> _FP_FRAC_SLL_4_down)); \ + X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \ + } \ + for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \ + X##_f[_FP_FRAC_SLL_4_i] = 0; \ + } \ while (0) /* This one was broken too */ -#define _FP_FRAC_SRL_4(X, N) \ - do \ - { \ - _FP_I_TYPE _up, _down, _skip, _i; \ - _skip = (N) / _FP_W_TYPE_SIZE; \ - _down = (N) % _FP_W_TYPE_SIZE; \ - _up = _FP_W_TYPE_SIZE - _down; \ - if (!_down) \ - for (_i = 0; _i <= 3-_skip; ++_i) \ - X##_f[_i] = X##_f[_i+_skip]; \ - else \ - { \ - for (_i = 0; _i < 3-_skip; ++_i) \ - X##_f[_i] = (X##_f[_i+_skip] >> _down \ - | X##_f[_i+_skip+1] << _up); \ - X##_f[_i++] = X##_f[3] >> _down; \ - } \ - for (; _i < 4; ++_i) \ - X##_f[_i] = 0; \ - } \ +#define _FP_FRAC_SRL_4(X, N) \ + do \ + { \ + _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \ + _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \ + _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \ + if (!_FP_FRAC_SRL_4_down) \ + for (_FP_FRAC_SRL_4_i = 0; \ + _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \ + ++_FP_FRAC_SRL_4_i) \ + X##_f[_FP_FRAC_SRL_4_i] \ + = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \ + else \ + { \ + for (_FP_FRAC_SRL_4_i = 0; \ + _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \ + ++_FP_FRAC_SRL_4_i) \ + X##_f[_FP_FRAC_SRL_4_i] \ + = ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \ + >> _FP_FRAC_SRL_4_down) \ + | (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \ + << _FP_FRAC_SRL_4_up)); \ + X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \ + } \ + for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \ + X##_f[_FP_FRAC_SRL_4_i] = 0; \ + } \ while (0) @@ -90,40 +108,53 @@ * but that if any of the bits that fall off the right hand side * were one then we always set the LSbit. */ -#define _FP_FRAC_SRST_4(X, S, N, size) \ - do \ - { \ - _FP_I_TYPE _up, _down, _skip, _i; \ - _FP_W_TYPE _s; \ - _skip = (N) / _FP_W_TYPE_SIZE; \ - _down = (N) % _FP_W_TYPE_SIZE; \ - _up = _FP_W_TYPE_SIZE - _down; \ - for (_s = _i = 0; _i < _skip; ++_i) \ - _s |= X##_f[_i]; \ - if (!_down) \ - for (_i = 0; _i <= 3-_skip; ++_i) \ - X##_f[_i] = X##_f[_i+_skip]; \ - else \ - { \ - _s |= X##_f[_i] << _up; \ - for (_i = 0; _i < 3-_skip; ++_i) \ - X##_f[_i] = (X##_f[_i+_skip] >> _down \ - | X##_f[_i+_skip+1] << _up); \ - X##_f[_i++] = X##_f[3] >> _down; \ - } \ - for (; _i < 4; ++_i) \ - X##_f[_i] = 0; \ - S = (_s != 0); \ - } \ +#define _FP_FRAC_SRST_4(X, S, N, size) \ + do \ + { \ + _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \ + _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \ + _FP_W_TYPE _FP_FRAC_SRST_4_s; \ + _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \ + for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \ + _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \ + ++_FP_FRAC_SRST_4_i) \ + _FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \ + if (!_FP_FRAC_SRST_4_down) \ + for (_FP_FRAC_SRST_4_i = 0; \ + _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \ + ++_FP_FRAC_SRST_4_i) \ + X##_f[_FP_FRAC_SRST_4_i] \ + = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \ + else \ + { \ + _FP_FRAC_SRST_4_s \ + |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \ + for (_FP_FRAC_SRST_4_i = 0; \ + _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \ + ++_FP_FRAC_SRST_4_i) \ + X##_f[_FP_FRAC_SRST_4_i] \ + = ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \ + >> _FP_FRAC_SRST_4_down) \ + | (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \ + << _FP_FRAC_SRST_4_up)); \ + X##_f[_FP_FRAC_SRST_4_i++] \ + = X##_f[3] >> _FP_FRAC_SRST_4_down; \ + } \ + for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \ + X##_f[_FP_FRAC_SRST_4_i] = 0; \ + S = (_FP_FRAC_SRST_4_s != 0); \ + } \ while (0) -#define _FP_FRAC_SRS_4(X, N, size) \ - do \ - { \ - int _sticky; \ - _FP_FRAC_SRST_4 (X, _sticky, N, size); \ - X##_f[0] |= _sticky; \ - } \ +#define _FP_FRAC_SRS_4(X, N, size) \ + do \ + { \ + int _FP_FRAC_SRS_4_sticky; \ + _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, N, size); \ + X##_f[0] |= _FP_FRAC_SRS_4_sticky; \ + } \ while (0) #define _FP_FRAC_ADD_4(R, X, Y) \ @@ -201,60 +232,62 @@ while (0) -#define _FP_UNPACK_RAW_4(fs, X, val) \ - do \ - { \ - union _FP_UNION_##fs _flo; \ - _flo.flt = (val); \ - X##_f[0] = _flo.bits.frac0; \ - X##_f[1] = _flo.bits.frac1; \ - X##_f[2] = _flo.bits.frac2; \ - X##_f[3] = _flo.bits.frac3; \ - X##_e = _flo.bits.exp; \ - X##_s = _flo.bits.sign; \ - } \ +#define _FP_UNPACK_RAW_4(fs, X, val) \ + do \ + { \ + union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \ + _FP_UNPACK_RAW_4_flo.flt = (val); \ + X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \ + X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \ + X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \ + X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \ + X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \ + X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \ + } \ while (0) -#define _FP_UNPACK_RAW_4_P(fs, X, val) \ - do \ - { \ - union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ - \ - X##_f[0] = _flo->bits.frac0; \ - X##_f[1] = _flo->bits.frac1; \ - X##_f[2] = _flo->bits.frac2; \ - X##_f[3] = _flo->bits.frac3; \ - X##_e = _flo->bits.exp; \ - X##_s = _flo->bits.sign; \ - } \ +#define _FP_UNPACK_RAW_4_P(fs, X, val) \ + do \ + { \ + union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \ + = (union _FP_UNION_##fs *) (val); \ + \ + X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \ + X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \ + X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \ + X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \ + X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \ + X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \ + } \ while (0) #define _FP_PACK_RAW_4(fs, val, X) \ do \ { \ - union _FP_UNION_##fs _flo; \ - _flo.bits.frac0 = X##_f[0]; \ - _flo.bits.frac1 = X##_f[1]; \ - _flo.bits.frac2 = X##_f[2]; \ - _flo.bits.frac3 = X##_f[3]; \ - _flo.bits.exp = X##_e; \ - _flo.bits.sign = X##_s; \ - (val) = _flo.flt; \ + union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \ + _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \ + _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \ + _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \ + _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \ + _FP_PACK_RAW_4_flo.bits.exp = X##_e; \ + _FP_PACK_RAW_4_flo.bits.sign = X##_s; \ + (val) = _FP_PACK_RAW_4_flo.flt; \ } \ while (0) -#define _FP_PACK_RAW_4_P(fs, val, X) \ - do \ - { \ - union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ - \ - _flo->bits.frac0 = X##_f[0]; \ - _flo->bits.frac1 = X##_f[1]; \ - _flo->bits.frac2 = X##_f[2]; \ - _flo->bits.frac3 = X##_f[3]; \ - _flo->bits.exp = X##_e; \ - _flo->bits.sign = X##_s; \ - } \ +#define _FP_PACK_RAW_4_P(fs, val, X) \ + do \ + { \ + union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \ + = (union _FP_UNION_##fs *) (val); \ + \ + _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \ + _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \ + _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \ + _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \ + _FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \ + _FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \ + } \ while (0) /* @@ -266,81 +299,126 @@ #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_2 (_b); \ - _FP_FRAC_DECL_2 (_c); \ - _FP_FRAC_DECL_2 (_d); \ - _FP_FRAC_DECL_2 (_e); \ - _FP_FRAC_DECL_2 (_f); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \ + _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \ \ - doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), X##_f[0], Y##_f[0]); \ - doit (_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ - doit (_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ - doit (_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ - doit (_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ - doit (_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ + doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \ + X##_f[0], Y##_f[0]); \ + doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \ + X##_f[0], Y##_f[1]); \ + doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \ + X##_f[1], Y##_f[0]); \ + doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \ + X##_f[1], Y##_f[1]); \ + doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \ + X##_f[0], Y##_f[2]); \ + doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \ + X##_f[2], Y##_f[0]); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \ - _FP_FRAC_WORD_8 (R, 1), 0, _b_f1, _b_f0, \ + _FP_FRAC_WORD_8 (R, 1), 0, \ + _FP_MUL_MEAT_DW_4_wide_b_f1, \ + _FP_MUL_MEAT_DW_4_wide_b_f0, \ 0, 0, _FP_FRAC_WORD_8 (R, 1)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \ - _FP_FRAC_WORD_8 (R, 1), 0, _c_f1, _c_f0, \ + _FP_FRAC_WORD_8 (R, 1), 0, \ + _FP_MUL_MEAT_DW_4_wide_c_f1, \ + _FP_MUL_MEAT_DW_4_wide_c_f0, \ _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \ _FP_FRAC_WORD_8 (R, 1)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ - _FP_FRAC_WORD_8 (R, 2), 0, _d_f1, _d_f0, \ + _FP_FRAC_WORD_8 (R, 2), 0, \ + _FP_MUL_MEAT_DW_4_wide_d_f1, \ + _FP_MUL_MEAT_DW_4_wide_d_f0, \ 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ - _FP_FRAC_WORD_8 (R, 2), 0, _e_f1, _e_f0, \ + _FP_FRAC_WORD_8 (R, 2), 0, \ + _FP_MUL_MEAT_DW_4_wide_e_f1, \ + _FP_MUL_MEAT_DW_4_wide_e_f0, \ _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ _FP_FRAC_WORD_8 (R, 2)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ - _FP_FRAC_WORD_8 (R, 2), 0, _f_f1, _f_f0, \ + _FP_FRAC_WORD_8 (R, 2), 0, \ + _FP_MUL_MEAT_DW_4_wide_f_f1, \ + _FP_MUL_MEAT_DW_4_wide_f_f0, \ _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ _FP_FRAC_WORD_8 (R, 2)); \ - doit (_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ - doit (_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ - doit (_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ - doit (_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ + doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \ + _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \ + doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \ + _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \ + doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \ + X##_f[1], Y##_f[2]); \ + doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \ + X##_f[2], Y##_f[1]); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ - _FP_FRAC_WORD_8 (R, 3), 0, _b_f1, _b_f0, \ + _FP_FRAC_WORD_8 (R, 3), 0, \ + _FP_MUL_MEAT_DW_4_wide_b_f1, \ + _FP_MUL_MEAT_DW_4_wide_b_f0, \ 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ - _FP_FRAC_WORD_8 (R, 3), 0, _c_f1, _c_f0, \ + _FP_FRAC_WORD_8 (R, 3), 0, \ + _FP_MUL_MEAT_DW_4_wide_c_f1, \ + _FP_MUL_MEAT_DW_4_wide_c_f0, \ _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ _FP_FRAC_WORD_8 (R, 3)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ - _FP_FRAC_WORD_8 (R, 3), 0, _d_f1, _d_f0, \ + _FP_FRAC_WORD_8 (R, 3), 0, \ + _FP_MUL_MEAT_DW_4_wide_d_f1, \ + _FP_MUL_MEAT_DW_4_wide_d_f0, \ _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ _FP_FRAC_WORD_8 (R, 3)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ - _FP_FRAC_WORD_8 (R, 3), 0, _e_f1, _e_f0, \ + _FP_FRAC_WORD_8 (R, 3), 0, \ + _FP_MUL_MEAT_DW_4_wide_e_f1, \ + _FP_MUL_MEAT_DW_4_wide_e_f0, \ _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ _FP_FRAC_WORD_8 (R, 3)); \ - doit (_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ - doit (_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ - doit (_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ - doit (_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ - doit (_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ + doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \ + X##_f[2], Y##_f[2]); \ + doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \ + X##_f[1], Y##_f[3]); \ + doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \ + X##_f[3], Y##_f[1]); \ + doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \ + X##_f[2], Y##_f[3]); \ + doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \ + X##_f[3], Y##_f[2]); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ - _FP_FRAC_WORD_8 (R, 4), 0, _b_f1, _b_f0, \ + _FP_FRAC_WORD_8 (R, 4), 0, \ + _FP_MUL_MEAT_DW_4_wide_b_f1, \ + _FP_MUL_MEAT_DW_4_wide_b_f0, \ 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ - _FP_FRAC_WORD_8 (R, 4), 0, _c_f1, _c_f0, \ + _FP_FRAC_WORD_8 (R, 4), 0, \ + _FP_MUL_MEAT_DW_4_wide_c_f1, \ + _FP_MUL_MEAT_DW_4_wide_c_f0, \ _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ _FP_FRAC_WORD_8 (R, 4)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ - _FP_FRAC_WORD_8 (R, 4), 0, _d_f1, _d_f0, \ + _FP_FRAC_WORD_8 (R, 4), 0, \ + _FP_MUL_MEAT_DW_4_wide_d_f1, \ + _FP_MUL_MEAT_DW_4_wide_d_f0, \ _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ _FP_FRAC_WORD_8 (R, 4)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ - _FP_FRAC_WORD_8 (R, 5), 0, _e_f1, _e_f0, \ + _FP_FRAC_WORD_8 (R, 5), 0, \ + _FP_MUL_MEAT_DW_4_wide_e_f1, \ + _FP_MUL_MEAT_DW_4_wide_e_f0, \ 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ - _FP_FRAC_WORD_8 (R, 5), 0, _f_f1, _f_f0, \ + _FP_FRAC_WORD_8 (R, 5), 0, \ + _FP_MUL_MEAT_DW_4_wide_f_f1, \ + _FP_MUL_MEAT_DW_4_wide_f_f0, \ _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ _FP_FRAC_WORD_8 (R, 5)); \ - doit (_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ + doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \ + X##_f[3], Y##_f[3]); \ __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ - _b_f1, _b_f0, \ + _FP_MUL_MEAT_DW_4_wide_b_f1, \ + _FP_MUL_MEAT_DW_4_wide_b_f0, \ _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \ } \ while (0) @@ -348,16 +426,19 @@ #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ do \ { \ - _FP_FRAC_DECL_8 (_z); \ + _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \ \ - _FP_MUL_MEAT_DW_4_wide (wfracbits, _z, X, Y, doit); \ + _FP_MUL_MEAT_DW_4_wide (wfracbits, _FP_MUL_MEAT_4_wide_z, \ + X, Y, doit); \ \ /* Normalize since we know where the msb of the multiplicands \ were (bit B), we know that the msb of the of the product is \ at either 2B or 2B-1. */ \ - _FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \ - __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \ - _FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \ + _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, wfracbits-1, 2*wfracbits); \ + __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \ + _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \ + _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \ + _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \ } \ while (0) @@ -371,16 +452,18 @@ #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ do \ { \ - _FP_FRAC_DECL_8 (_z); \ + _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \ \ - _FP_MUL_MEAT_DW_4_gmp (wfracbits, _z, X, Y); \ + _FP_MUL_MEAT_DW_4_gmp (wfracbits, _FP_MUL_MEAT_4_gmp_z, X, Y); \ \ /* Normalize since we know where the msb of the multiplicands \ were (bit B), we know that the msb of the of the product is \ at either 2B or 2B-1. */ \ - _FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \ - __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \ - _FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \ + _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, wfracbits-1, 2*wfracbits); \ + __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \ + _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \ + _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \ + _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \ } \ while (0) @@ -391,12 +474,12 @@ #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \ do \ { \ - UWtype _t; \ + UWtype umul_ppppmnnn_t; \ umul_ppmm (p1, p0, m, n0); \ - umul_ppmm (p2, _t, m, n1); \ - __FP_FRAC_ADDI_2 (p2, p1, _t); \ - umul_ppmm (p3, _t, m, n2); \ - __FP_FRAC_ADDI_2 (p3, p2, _t); \ + umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \ + __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \ + umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \ + __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \ } \ while (0) @@ -407,13 +490,14 @@ #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ do \ { \ - int _i; \ - _FP_FRAC_DECL_4 (_n); \ - _FP_FRAC_DECL_4 (_m); \ - _FP_FRAC_SET_4 (_n, _FP_ZEROFRAC_4); \ + int _FP_DIV_MEAT_4_udiv_i; \ + _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \ + _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \ + _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \ if (_FP_FRAC_GE_4 (X, Y)) \ { \ - _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ + _FP_DIV_MEAT_4_udiv_n_f[3] \ + = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ _FP_FRAC_SRL_4 (X, 1); \ } \ else \ @@ -423,49 +507,58 @@ denominator set. */ \ _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \ \ - for (_i = 3; ; _i--) \ + for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \ { \ if (X##_f[3] == Y##_f[3]) \ { \ /* This is a special case, not an optimization \ (X##_f[3]/Y##_f[3] would not fit into UWtype). \ - As X## is guaranteed to be < Y, R##_f[_i] can be either \ + As X## is guaranteed to be < Y, \ + R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \ (UWtype)-1 or (UWtype)-2. */ \ - R##_f[_i] = -1; \ - if (!_i) \ + R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \ + if (!_FP_DIV_MEAT_4_udiv_i) \ break; \ __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ Y##_f[2], Y##_f[1], Y##_f[0], 0, \ - X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ + X##_f[2], X##_f[1], X##_f[0], \ + _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \ _FP_FRAC_SUB_4 (X, Y, X); \ if (X##_f[3] > Y##_f[3]) \ { \ - R##_f[_i] = -2; \ + R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \ _FP_FRAC_ADD_4 (X, Y, X); \ } \ } \ else \ { \ - udiv_qrnnd (R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ - umul_ppppmnnn (_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ - R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ + udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \ + X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ + umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \ + _FP_DIV_MEAT_4_udiv_m_f[2], \ + _FP_DIV_MEAT_4_udiv_m_f[1], \ + _FP_DIV_MEAT_4_udiv_m_f[0], \ + R##_f[_FP_DIV_MEAT_4_udiv_i], \ + Y##_f[2], Y##_f[1], Y##_f[0]); \ X##_f[2] = X##_f[1]; \ X##_f[1] = X##_f[0]; \ - X##_f[0] = _n_f[_i]; \ - if (_FP_FRAC_GT_4 (_m, X)) \ + X##_f[0] \ + = _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \ + if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \ { \ - R##_f[_i]--; \ + R##_f[_FP_DIV_MEAT_4_udiv_i]--; \ _FP_FRAC_ADD_4 (X, Y, X); \ - if (_FP_FRAC_GE_4 (X, Y) && _FP_FRAC_GT_4 (_m, X)) \ + if (_FP_FRAC_GE_4 (X, Y) \ + && _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \ { \ - R##_f[_i]--; \ + R##_f[_FP_DIV_MEAT_4_udiv_i]--; \ _FP_FRAC_ADD_4 (X, Y, X); \ } \ } \ - _FP_FRAC_DEC_4 (X, _m); \ - if (!_i) \ + _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \ + if (!_FP_DIV_MEAT_4_udiv_i) \ { \ - if (!_FP_FRAC_EQ_4 (X, _m)) \ + if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \ R##_f[0] |= _FP_WORK_STICKY; \ break; \ } \ @@ -591,18 +684,19 @@ # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \ do \ { \ - _FP_W_TYPE _c1, _c2, _c3; \ + _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \ + _FP_W_TYPE __FP_FRAC_ADD_4_c3; \ r0 = x0 + y0; \ - _c1 = r0 < x0; \ + __FP_FRAC_ADD_4_c1 = r0 < x0; \ r1 = x1 + y1; \ - _c2 = r1 < x1; \ - r1 += _c1; \ - _c2 |= r1 < _c1; \ + __FP_FRAC_ADD_4_c2 = r1 < x1; \ + r1 += __FP_FRAC_ADD_4_c1; \ + __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \ r2 = x2 + y2; \ - _c3 = r2 < x2; \ - r2 += _c2; \ - _c3 |= r2 < _c2; \ - r3 = x3 + y3 + _c3; \ + __FP_FRAC_ADD_4_c3 = r2 < x2; \ + r2 += __FP_FRAC_ADD_4_c2; \ + __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \ + r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \ } \ while (0) #endif @@ -611,14 +705,14 @@ # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \ do \ { \ - _FP_W_TYPE _c1, _c2; \ + _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \ r0 = x0 - y0; \ - _c1 = r0 > x0; \ + __FP_FRAC_SUB_3_c1 = r0 > x0; \ r1 = x1 - y1; \ - _c2 = r1 > x1; \ - r1 -= _c1; \ - _c2 |= _c1 && (y1 == x1); \ - r2 = x2 - y2 - _c2; \ + __FP_FRAC_SUB_3_c2 = r1 > x1; \ + r1 -= __FP_FRAC_SUB_3_c1; \ + __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \ + r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \ } \ while (0) #endif @@ -627,18 +721,19 @@ # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \ do \ { \ - _FP_W_TYPE _c1, _c2, _c3; \ + _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \ + _FP_W_TYPE __FP_FRAC_SUB_4_c3; \ r0 = x0 - y0; \ - _c1 = r0 > x0; \ + __FP_FRAC_SUB_4_c1 = r0 > x0; \ r1 = x1 - y1; \ - _c2 = r1 > x1; \ - r1 -= _c1; \ - _c2 |= _c1 && (y1 == x1); \ + __FP_FRAC_SUB_4_c2 = r1 > x1; \ + r1 -= __FP_FRAC_SUB_4_c1; \ + __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \ r2 = x2 - y2; \ - _c3 = r2 > x2; \ - r2 -= _c2; \ - _c3 |= _c2 && (y2 == x2); \ - r3 = x3 - y3 - _c3; \ + __FP_FRAC_SUB_4_c3 = r2 > x2; \ + r2 -= __FP_FRAC_SUB_4_c2; \ + __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \ + r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \ } \ while (0) #endif @@ -647,36 +742,47 @@ # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \ do \ { \ - UWtype _t0, _t1, _t2; \ - _t0 = x0, _t1 = x1, _t2 = x2; \ - __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ + UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \ + UWtype __FP_FRAC_DEC_3_t2; \ + __FP_FRAC_DEC_3_t0 = x0; \ + __FP_FRAC_DEC_3_t1 = x1; \ + __FP_FRAC_DEC_3_t2 = x2; \ + __FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \ + __FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \ + y2, y1, y0); \ } \ while (0) #endif #ifndef __FP_FRAC_DEC_4 -# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \ - do \ - { \ - UWtype _t0, _t1, _t2, _t3; \ - _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ - __FP_FRAC_SUB_4 (x3, x2, x1, x0, _t3, _t2, _t1, _t0, y3, y2, y1, y0); \ - } \ +# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \ + do \ + { \ + UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \ + UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \ + __FP_FRAC_DEC_4_t0 = x0; \ + __FP_FRAC_DEC_4_t1 = x1; \ + __FP_FRAC_DEC_4_t2 = x2; \ + __FP_FRAC_DEC_4_t3 = x3; \ + __FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \ + __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \ + __FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \ + } \ while (0) #endif #ifndef __FP_FRAC_ADDI_4 -# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \ - do \ - { \ - UWtype _t; \ - _t = ((x0 += i) < i); \ - x1 += _t; \ - _t = (x1 < _t); \ - x2 += _t; \ - _t = (x2 < _t); \ - x3 += _t; \ - } \ +# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \ + do \ + { \ + UWtype __FP_FRAC_ADDI_4_t; \ + __FP_FRAC_ADDI_4_t = ((x0 += i) < i); \ + x1 += __FP_FRAC_ADDI_4_t; \ + __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \ + x2 += __FP_FRAC_ADDI_4_t; \ + __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \ + x3 += __FP_FRAC_ADDI_4_t; \ + } \ while (0) #endif diff --git a/soft-fp/op-8.h b/soft-fp/op-8.h index a50cd7b..c966ee2 100644 --- a/soft-fp/op-8.h +++ b/soft-fp/op-8.h @@ -36,48 +36,66 @@ #define _FP_FRAC_LOW_8(X) (X##_f[0]) #define _FP_FRAC_WORD_8(X, w) (X##_f[w]) -#define _FP_FRAC_SLL_8(X, N) \ - do \ - { \ - _FP_I_TYPE _up, _down, _skip, _i; \ - _skip = (N) / _FP_W_TYPE_SIZE; \ - _up = (N) % _FP_W_TYPE_SIZE; \ - _down = _FP_W_TYPE_SIZE - _up; \ - if (!_up) \ - for (_i = 7; _i >= _skip; --_i) \ - X##_f[_i] = X##_f[_i-_skip]; \ - else \ - { \ - for (_i = 7; _i > _skip; --_i) \ - X##_f[_i] = (X##_f[_i-_skip] << _up \ - | X##_f[_i-_skip-1] >> _down); \ - X##_f[_i--] = X##_f[0] << _up; \ - } \ - for (; _i >= 0; --_i) \ - X##_f[_i] = 0; \ - } \ +#define _FP_FRAC_SLL_8(X, N) \ + do \ + { \ + _FP_I_TYPE _FP_FRAC_SLL_8_up, _FP_FRAC_SLL_8_down; \ + _FP_I_TYPE _FP_FRAC_SLL_8_skip, _FP_FRAC_SLL_8_i; \ + _FP_FRAC_SLL_8_skip = (N) / _FP_W_TYPE_SIZE; \ + _FP_FRAC_SLL_8_up = (N) % _FP_W_TYPE_SIZE; \ + _FP_FRAC_SLL_8_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_8_up; \ + if (!_FP_FRAC_SLL_8_up) \ + for (_FP_FRAC_SLL_8_i = 7; \ + _FP_FRAC_SLL_8_i >= _FP_FRAC_SLL_8_skip; \ + --_FP_FRAC_SLL_8_i) \ + X##_f[_FP_FRAC_SLL_8_i] \ + = X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip]; \ + else \ + { \ + for (_FP_FRAC_SLL_8_i = 7; \ + _FP_FRAC_SLL_8_i > _FP_FRAC_SLL_8_skip; \ + --_FP_FRAC_SLL_8_i) \ + X##_f[_FP_FRAC_SLL_8_i] \ + = ((X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip] \ + << _FP_FRAC_SLL_8_up) \ + | (X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip-1] \ + >> _FP_FRAC_SLL_8_down)); \ + X##_f[_FP_FRAC_SLL_8_i--] = X##_f[0] << _FP_FRAC_SLL_8_up; \ + } \ + for (; _FP_FRAC_SLL_8_i >= 0; --_FP_FRAC_SLL_8_i) \ + X##_f[_FP_FRAC_SLL_8_i] = 0; \ + } \ while (0) -#define _FP_FRAC_SRL_8(X, N) \ - do \ - { \ - _FP_I_TYPE _up, _down, _skip, _i; \ - _skip = (N) / _FP_W_TYPE_SIZE; \ - _down = (N) % _FP_W_TYPE_SIZE; \ - _up = _FP_W_TYPE_SIZE - _down; \ - if (!_down) \ - for (_i = 0; _i <= 7-_skip; ++_i) \ - X##_f[_i] = X##_f[_i+_skip]; \ - else \ - { \ - for (_i = 0; _i < 7-_skip; ++_i) \ - X##_f[_i] = (X##_f[_i+_skip] >> _down \ - | X##_f[_i+_skip+1] << _up); \ - X##_f[_i++] = X##_f[7] >> _down; \ - } \ - for (; _i < 8; ++_i) \ - X##_f[_i] = 0; \ - } \ +#define _FP_FRAC_SRL_8(X, N) \ + do \ + { \ + _FP_I_TYPE _FP_FRAC_SRL_8_up, _FP_FRAC_SRL_8_down; \ + _FP_I_TYPE _FP_FRAC_SRL_8_skip, _FP_FRAC_SRL_8_i; \ + _FP_FRAC_SRL_8_skip = (N) / _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRL_8_down = (N) % _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRL_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_8_down; \ + if (!_FP_FRAC_SRL_8_down) \ + for (_FP_FRAC_SRL_8_i = 0; \ + _FP_FRAC_SRL_8_i <= 7-_FP_FRAC_SRL_8_skip; \ + ++_FP_FRAC_SRL_8_i) \ + X##_f[_FP_FRAC_SRL_8_i] \ + = X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip]; \ + else \ + { \ + for (_FP_FRAC_SRL_8_i = 0; \ + _FP_FRAC_SRL_8_i < 7-_FP_FRAC_SRL_8_skip; \ + ++_FP_FRAC_SRL_8_i) \ + X##_f[_FP_FRAC_SRL_8_i] \ + = ((X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip] \ + >> _FP_FRAC_SRL_8_down) \ + | (X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip+1] \ + << _FP_FRAC_SRL_8_up)); \ + X##_f[_FP_FRAC_SRL_8_i++] = X##_f[7] >> _FP_FRAC_SRL_8_down; \ + } \ + for (; _FP_FRAC_SRL_8_i < 8; ++_FP_FRAC_SRL_8_i) \ + X##_f[_FP_FRAC_SRL_8_i] = 0; \ + } \ while (0) @@ -89,28 +107,40 @@ #define _FP_FRAC_SRS_8(X, N, size) \ do \ { \ - _FP_I_TYPE _up, _down, _skip, _i; \ - _FP_W_TYPE _s; \ - _skip = (N) / _FP_W_TYPE_SIZE; \ - _down = (N) % _FP_W_TYPE_SIZE; \ - _up = _FP_W_TYPE_SIZE - _down; \ - for (_s = _i = 0; _i < _skip; ++_i) \ - _s |= X##_f[_i]; \ - if (!_down) \ - for (_i = 0; _i <= 7-_skip; ++_i) \ - X##_f[_i] = X##_f[_i+_skip]; \ + _FP_I_TYPE _FP_FRAC_SRS_8_up, _FP_FRAC_SRS_8_down; \ + _FP_I_TYPE _FP_FRAC_SRS_8_skip, _FP_FRAC_SRS_8_i; \ + _FP_W_TYPE _FP_FRAC_SRS_8_s; \ + _FP_FRAC_SRS_8_skip = (N) / _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRS_8_down = (N) % _FP_W_TYPE_SIZE; \ + _FP_FRAC_SRS_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRS_8_down; \ + for (_FP_FRAC_SRS_8_s = _FP_FRAC_SRS_8_i = 0; \ + _FP_FRAC_SRS_8_i < _FP_FRAC_SRS_8_skip; \ + ++_FP_FRAC_SRS_8_i) \ + _FP_FRAC_SRS_8_s |= X##_f[_FP_FRAC_SRS_8_i]; \ + if (!_FP_FRAC_SRS_8_down) \ + for (_FP_FRAC_SRS_8_i = 0; \ + _FP_FRAC_SRS_8_i <= 7-_FP_FRAC_SRS_8_skip; \ + ++_FP_FRAC_SRS_8_i) \ + X##_f[_FP_FRAC_SRS_8_i] \ + = X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip]; \ else \ { \ - _s |= X##_f[_i] << _up; \ - for (_i = 0; _i < 7-_skip; ++_i) \ - X##_f[_i] = (X##_f[_i+_skip] >> _down \ - | X##_f[_i+_skip+1] << _up); \ - X##_f[_i++] = X##_f[7] >> _down; \ + _FP_FRAC_SRS_8_s \ + |= X##_f[_FP_FRAC_SRS_8_i] << _FP_FRAC_SRS_8_up; \ + for (_FP_FRAC_SRS_8_i = 0; \ + _FP_FRAC_SRS_8_i < 7-_FP_FRAC_SRS_8_skip; \ + ++_FP_FRAC_SRS_8_i) \ + X##_f[_FP_FRAC_SRS_8_i] \ + = ((X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip] \ + >> _FP_FRAC_SRS_8_down) \ + | (X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip+1] \ + << _FP_FRAC_SRS_8_up)); \ + X##_f[_FP_FRAC_SRS_8_i++] = X##_f[7] >> _FP_FRAC_SRS_8_down; \ } \ - for (; _i < 8; ++_i) \ - X##_f[_i] = 0; \ + for (; _FP_FRAC_SRS_8_i < 8; ++_FP_FRAC_SRS_8_i) \ + X##_f[_FP_FRAC_SRS_8_i] = 0; \ /* don't fix the LSB until the very end when we're sure f[0] is \ stable */ \ - X##_f[0] |= (_s != 0); \ + X##_f[0] |= (_FP_FRAC_SRS_8_s != 0); \ } \ while (0) |