PR target/44218, improve -mrecip on powerpc

From-SVN: r160199
author: Michael Meissner <meissner@linux.vnet.ibm.com> 2010-06-03 00:06:12 +0000
committer: Michael Meissner <meissner@gcc.gnu.org> 2010-06-03 00:06:12 +0000
commit: 92902797041a42ac500f7dc9639df8a680e0b691 (patch)
tree: d55e7fa0ae623e1c748075d3f81edeb35fb123fb /gcc/doc
parent: 6c07d08b90b124d8d3be8015726caf799e2e2a13 (diff)
download: gcc-92902797041a42ac500f7dc9639df8a680e0b691.zip
gcc-92902797041a42ac500f7dc9639df8a680e0b691.tar.gz
gcc-92902797041a42ac500f7dc9639df8a680e0b691.tar.bz2
2 files changed, 65 insertions, 13 deletions
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8e9a706..5f0d762 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10994,6 +10994,10 @@ vector unsigned char vec_vrlb (vector unsigned char,
 
 vector float vec_round (vector float);
 
+vector float vec_recip (vector float, vector float);
+
+vector float vec_rsqrt (vector float);
+
 vector float vec_rsqrte (vector float);
 
 vector float vec_sel (vector float, vector float, vector bool int);
@@ -11922,8 +11926,10 @@ vector double vec_or (vector bool long, vector double);
 vector double vec_perm (vector double,
                         vector double,
                         vector unsigned char);
-vector float vec_rint (vector float);
 vector double vec_rint (vector double);
+vector double vec_recip (vector double, vector double);
+vector double vec_rsqrt (vector double);
+vector double vec_rsqrte (vector double);
 vector double vec_sel (vector double, vector double, vector bool long);
 vector double vec_sel (vector double, vector double, vector unsigned long);
 vector double vec_sub (vector double, vector double);
@@ -11964,10 +11970,20 @@ GCC provides a few other builtins on Powerpc to access certain instructions:
 float __builtin_recipdivf (float, float);
 float __builtin_rsqrtf (float);
 double __builtin_recipdiv (double, double);
+double __builtin_rsqrt (double);
 long __builtin_bpermd (long, long);
 int __builtin_bswap16 (int);
 @end smallexample
 
+The @code{vec_rsqrt}, @code{__builtin_rsqrt}, and
+@code{__builtin_rsqrtf} functions generate multiple instructions to
+implement the reciprocal sqrt functionality using reciprocal sqrt
+estimate instructions.
+
+The @code{__builtin_recipdiv}, and @code{__builtin_recipdivf}
+functions generate multiple instructions to implement division using
+the reciprocal estimate instructions.
+
 @node RX Built-in Functions
 @subsection RX Built-in Functions
 GCC supports some of the RX instructions which cannot be expressed in
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 2a4ea47..d8c0c22 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -783,7 +783,8 @@ See RS/6000 and PowerPC Options.
 -mfloat-gprs=yes  -mfloat-gprs=no -mfloat-gprs=single -mfloat-gprs=double @gol
 -mprototype  -mno-prototype @gol
 -msim  -mmvme  -mads  -myellowknife  -memb  -msdata @gol
--msdata=@var{opt}  -mvxworks  -G @var{num}  -pthread}
+-msdata=@var{opt}  -mvxworks  -G @var{num}  -pthread @gol
+-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision -mno-recip-precision}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -14975,17 +14976,6 @@ values for @var{cpu_type} are used for @option{-mtune} as for
 architecture, registers, and mnemonics set by @option{-mcpu}, but the
 scheduling parameters set by @option{-mtune}.
 
-@item -mswdiv
-@itemx -mno-swdiv
-@opindex mswdiv
-@opindex mno-swdiv
-Generate code to compute division as reciprocal estimate and iterative
-refinement, creating opportunities for increased throughput.  This
-feature requires: optional PowerPC Graphics instruction set for single
-precision and FRE instruction for double precision, assuming divides
-cannot generate user-visible traps, and the domain values not include
-Infinities, denormals or zero denominator.
-
 @item -maltivec
 @itemx -mno-altivec
 @opindex maltivec
@@ -15641,6 +15631,52 @@ sequence.
 Adds support for multithreading with the @dfn{pthreads} library.
 This option sets flags for both the preprocessor and linker.
 
+@item -mrecip
+@itemx -mno-recip
+@opindex mrecip
+This option will enable GCC to use the reciprocal estimate and
+reciprocal square root estimate instructions with additional
+Newton-Raphson steps to increase precision instead of doing a divide or
+square root and divide for floating point arguments.  You should use
+the @option{-ffast-math} option when using @option{-mrecip} (or at
+least @option{-funsafe-math-optimizations},
+@option{-finite-math-only}, @option{-freciprocal-math} and
+@option{-fno-trapping-math}).  Note that while the throughput of the
+sequence is generally higher than the throughput of the non-reciprocal
+instruction, the precision of the sequence can be decreased by up to 2
+ulp (i.e. the inverse of 1.0 equals 0.99999994) for reciprocal square
+roots.
+
+@item -mrecip=@var{opt}
+@opindex mrecip=opt
+This option allows to control which reciprocal estimate instructions
+may be used.  @var{opt} is a comma separated list of options, that may
+be preceeded by a @code{!} to invert the option:
+@code{all}: enable all estimate instructions,
+@code{default}: enable the default instructions, equvalent to @option{-mrecip},
+@code{none}: disable all estimate instructions, equivalent to @option{-mno-recip};
+@code{div}: enable the reciprocal approximation instructions for both single and double precision;
+@code{divf}: enable the single precision reciprocal approximation instructions;
+@code{divd}: enable the double precision reciprocal approximation instructions;
+@code{rsqrt}: enable the reciprocal square root approximation instructions for both single and double precision;
+@code{rsqrtf}: enable the single precision reciprocal square root approximation instructions;
+@code{rsqrtd}: enable the double precision reciprocal square root approximation instructions;
+
+So for example, @option{-mrecip=all,!rsqrtd} would enable the
+all of the reciprocal estimate instructions, except for the
+@code{FRSQRTE}, @code{XSRSQRTEDP}, and @code{XVRSQRTEDP} instructions
+which handle the double precision reciprocal square root calculations.
+
+@item -mrecip-precision
+@itemx -mno-recip-precision
+@opindex mrecip-precision
+Assume (do not assume) that the reciprocal estimate instructions
+provide higher precision estimates than is mandated by the powerpc
+ABI.  Selecting @option{-mcpu=power6} or @option{-mcpu=power7}
+automatically selects @option{-mrecip-precision}.  The double
+precision square root estimate instructions are not generated by
+default on low precision machines, since they do not provide an
+estimate that converges after three steps.
 @end table
 
 @node RX Options
author	Michael Meissner <meissner@linux.vnet.ibm.com>	2010-06-03 00:06:12 +0000
committer	Michael Meissner <meissner@gcc.gnu.org>	2010-06-03 00:06:12 +0000
commit	92902797041a42ac500f7dc9639df8a680e0b691 (patch)
tree	d55e7fa0ae623e1c748075d3f81edeb35fb123fb /gcc/doc
parent	6c07d08b90b124d8d3be8015726caf799e2e2a13 (diff)
download	gcc-92902797041a42ac500f7dc9639df8a680e0b691.zip gcc-92902797041a42ac500f7dc9639df8a680e0b691.tar.gz gcc-92902797041a42ac500f7dc9639df8a680e0b691.tar.bz2