diff options
author | Joey Ye <joey.ye@intel.com> | 2008-08-28 19:20:03 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2008-08-28 12:20:03 -0700 |
commit | 31cb596a6b572773d25e7196ff19e126a52ca1e4 (patch) | |
tree | 66504beb1202aa49882720584f8da13e799b6c0e | |
parent | 95879c728b9a59ae67db022ad370eb66374090f3 (diff) | |
download | gcc-31cb596a6b572773d25e7196ff19e126a52ca1e4.zip gcc-31cb596a6b572773d25e7196ff19e126a52ca1e4.tar.gz gcc-31cb596a6b572773d25e7196ff19e126a52ca1e4.tar.bz2 |
extend.texi: Document AVX built-in functions.
2008-08-28 Joey Ye <joey.ye@intel.com>
* doc/extend.texi: Document AVX built-in functions.
* doc/invoke.texi: Document -mavx.
From-SVN: r139727
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 135 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 10 |
3 files changed, 148 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b326cc6..5e93e9d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2008-08-28 Joey Ye <joey.ye@intel.com> + + * doc/extend.texi: Document AVX built-in functions. + * doc/invoke.texi: Document -mavx. + 2008-08-28 H.J. Lu <hongjiu.lu@intel.com> Joey Ye <joey.ye@intel.com> Xuepeng Guo <xuepeng.guo@intel.com> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 1f8cbd3..760c128 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -8271,6 +8271,141 @@ depending on the size of @code{unsigned long}. Generates the @code{popcntq} machine instruction. @end table +The following built-in functions are available when @option{-mavx} is +used. All of them generate the machine instruction that is part of the +name. + +@smallexample +v4df __builtin_ia32_addpd256 (v4df,v4df) +v8sf __builtin_ia32_addps256 (v8sf,v8sf) +v4df __builtin_ia32_addsubpd256 (v4df,v4df) +v8sf __builtin_ia32_addsubps256 (v8sf,v8sf) +v4df __builtin_ia32_andnpd256 (v4df,v4df) +v8sf __builtin_ia32_andnps256 (v8sf,v8sf) +v4df __builtin_ia32_andpd256 (v4df,v4df) +v8sf __builtin_ia32_andps256 (v8sf,v8sf) +v4df __builtin_ia32_blendpd256 (v4df,v4df,int) +v8sf __builtin_ia32_blendps256 (v8sf,v8sf,int) +v4df __builtin_ia32_blendvpd256 (v4df,v4df,v4df) +v8sf __builtin_ia32_blendvps256 (v8sf,v8sf,v8sf) +v2df __builtin_ia32_cmppd (v2df,v2df,int) +v4df __builtin_ia32_cmppd256 (v4df,v4df,int) +v4sf __builtin_ia32_cmpps (v4sf,v4sf,int) +v8sf __builtin_ia32_cmpps256 (v8sf,v8sf,int) +v2df __builtin_ia32_cmpsd (v2df,v2df,int) +v4sf __builtin_ia32_cmpss (v4sf,v4sf,int) +v4df __builtin_ia32_cvtdq2pd256 (v4si) +v8sf __builtin_ia32_cvtdq2ps256 (v8si) +v4si __builtin_ia32_cvtpd2dq256 (v4df) +v4sf __builtin_ia32_cvtpd2ps256 (v4df) +v8si __builtin_ia32_cvtps2dq256 (v8sf) +v4df __builtin_ia32_cvtps2pd256 (v4sf) +v4si __builtin_ia32_cvttpd2dq256 (v4df) +v8si __builtin_ia32_cvttps2dq256 (v8sf) +v4df __builtin_ia32_divpd256 (v4df,v4df) +v8sf __builtin_ia32_divps256 (v8sf,v8sf) +v8sf __builtin_ia32_dpps256 (v8sf,v8sf,int) +v4df __builtin_ia32_haddpd256 (v4df,v4df) +v8sf __builtin_ia32_haddps256 (v8sf,v8sf) +v4df __builtin_ia32_hsubpd256 (v4df,v4df) +v8sf __builtin_ia32_hsubps256 (v8sf,v8sf) +v32qi __builtin_ia32_lddqu256 (pcchar) +v32qi __builtin_ia32_loaddqu256 (pcchar) +v4df __builtin_ia32_loadupd256 (pcdouble) +v8sf __builtin_ia32_loadups256 (pcfloat) +v2df __builtin_ia32_maskloadpd (pcv2df,v2df) +v4df __builtin_ia32_maskloadpd256 (pcv4df,v4df) +v4sf __builtin_ia32_maskloadps (pcv4sf,v4sf) +v8sf __builtin_ia32_maskloadps256 (pcv8sf,v8sf) +void __builtin_ia32_maskstorepd (pv2df,v2df,v2df) +void __builtin_ia32_maskstorepd256 (pv4df,v4df,v4df) +void __builtin_ia32_maskstoreps (pv4sf,v4sf,v4sf) +void __builtin_ia32_maskstoreps256 (pv8sf,v8sf,v8sf) +v4df __builtin_ia32_maxpd256 (v4df,v4df) +v8sf __builtin_ia32_maxps256 (v8sf,v8sf) +v4df __builtin_ia32_minpd256 (v4df,v4df) +v8sf __builtin_ia32_minps256 (v8sf,v8sf) +v4df __builtin_ia32_movddup256 (v4df) +int __builtin_ia32_movmskpd256 (v4df) +int __builtin_ia32_movmskps256 (v8sf) +v8sf __builtin_ia32_movshdup256 (v8sf) +v8sf __builtin_ia32_movsldup256 (v8sf) +v4df __builtin_ia32_mulpd256 (v4df,v4df) +v8sf __builtin_ia32_mulps256 (v8sf,v8sf) +v4df __builtin_ia32_orpd256 (v4df,v4df) +v8sf __builtin_ia32_orps256 (v8sf,v8sf) +v2df __builtin_ia32_pd_pd256 (v4df) +v4df __builtin_ia32_pd256_pd (v2df) +v4sf __builtin_ia32_ps_ps256 (v8sf) +v8sf __builtin_ia32_ps256_ps (v4sf) +int __builtin_ia32_ptestc256 (v4di,v4di,ptest) +int __builtin_ia32_ptestnzc256 (v4di,v4di,ptest) +int __builtin_ia32_ptestz256 (v4di,v4di,ptest) +v8sf __builtin_ia32_rcpps256 (v8sf) +v4df __builtin_ia32_roundpd256 (v4df,int) +v8sf __builtin_ia32_roundps256 (v8sf,int) +v8sf __builtin_ia32_rsqrtps_nr256 (v8sf) +v8sf __builtin_ia32_rsqrtps256 (v8sf) +v4df __builtin_ia32_shufpd256 (v4df,v4df,int) +v8sf __builtin_ia32_shufps256 (v8sf,v8sf,int) +v4si __builtin_ia32_si_si256 (v8si) +v8si __builtin_ia32_si256_si (v4si) +v4df __builtin_ia32_sqrtpd256 (v4df) +v8sf __builtin_ia32_sqrtps_nr256 (v8sf) +v8sf __builtin_ia32_sqrtps256 (v8sf) +void __builtin_ia32_storedqu256 (pchar,v32qi) +void __builtin_ia32_storeupd256 (pdouble,v4df) +void __builtin_ia32_storeups256 (pfloat,v8sf) +v4df __builtin_ia32_subpd256 (v4df,v4df) +v8sf __builtin_ia32_subps256 (v8sf,v8sf) +v4df __builtin_ia32_unpckhpd256 (v4df,v4df) +v8sf __builtin_ia32_unpckhps256 (v8sf,v8sf) +v4df __builtin_ia32_unpcklpd256 (v4df,v4df) +v8sf __builtin_ia32_unpcklps256 (v8sf,v8sf) +v4df __builtin_ia32_vbroadcastf128_pd256 (pcv2df) +v8sf __builtin_ia32_vbroadcastf128_ps256 (pcv4sf) +v4df __builtin_ia32_vbroadcastsd256 (pcdouble) +v4sf __builtin_ia32_vbroadcastss (pcfloat) +v8sf __builtin_ia32_vbroadcastss256 (pcfloat) +v2df __builtin_ia32_vextractf128_pd256 (v4df,int) +v4sf __builtin_ia32_vextractf128_ps256 (v8sf,int) +v4si __builtin_ia32_vextractf128_si256 (v8si,int) +v4df __builtin_ia32_vinsertf128_pd256 (v4df,v2df,int) +v8sf __builtin_ia32_vinsertf128_ps256 (v8sf,v4sf,int) +v8si __builtin_ia32_vinsertf128_si256 (v8si,v4si,int) +v4df __builtin_ia32_vperm2f128_pd256 (v4df,v4df,int) +v8sf __builtin_ia32_vperm2f128_ps256 (v8sf,v8sf,int) +v8si __builtin_ia32_vperm2f128_si256 (v8si,v8si,int) +v2df __builtin_ia32_vpermil2pd (v2df,v2df,v2di,int) +v4df __builtin_ia32_vpermil2pd256 (v4df,v4df,v4di,int) +v4sf __builtin_ia32_vpermil2ps (v4sf,v4sf,v4si,int) +v8sf __builtin_ia32_vpermil2ps256 (v8sf,v8sf,v8si,int) +v2df __builtin_ia32_vpermilpd (v2df,int) +v4df __builtin_ia32_vpermilpd256 (v4df,int) +v4sf __builtin_ia32_vpermilps (v4sf,int) +v8sf __builtin_ia32_vpermilps256 (v8sf,int) +v2df __builtin_ia32_vpermilvarpd (v2df,v2di) +v4df __builtin_ia32_vpermilvarpd256 (v4df,v4di) +v4sf __builtin_ia32_vpermilvarps (v4sf,v4si) +v8sf __builtin_ia32_vpermilvarps256 (v8sf,v8si) +int __builtin_ia32_vtestcpd (v2df,v2df,ptest) +int __builtin_ia32_vtestcpd256 (v4df,v4df,ptest) +int __builtin_ia32_vtestcps (v4sf,v4sf,ptest) +int __builtin_ia32_vtestcps256 (v8sf,v8sf,ptest) +int __builtin_ia32_vtestnzcpd (v2df,v2df,ptest) +int __builtin_ia32_vtestnzcpd256 (v4df,v4df,ptest) +int __builtin_ia32_vtestnzcps (v4sf,v4sf,ptest) +int __builtin_ia32_vtestnzcps256 (v8sf,v8sf,ptest) +int __builtin_ia32_vtestzpd (v2df,v2df,ptest) +int __builtin_ia32_vtestzpd256 (v4df,v4df,ptest) +int __builtin_ia32_vtestzps (v4sf,v4sf,ptest) +int __builtin_ia32_vtestzps256 (v8sf,v8sf,ptest) +void __builtin_ia32_vzeroall (void) +void __builtin_ia32_vzeroupper (void) +v4df __builtin_ia32_xorpd256 (v4df,v4df) +v8sf __builtin_ia32_xorps256 (v8sf,v8sf) +@end smallexample + The following built-in functions are available when @option{-maes} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0c4e6b4..e1e6969 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -563,7 +563,7 @@ Objective-C and Objective-C++ Dialects}. -mpreferred-stack-boundary=@var{num} -mincoming-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol --mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol +-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -maes -mpclmul @gol -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol -mthreads -mno-align-stringops -minline-all-stringops @gol @@ -10865,6 +10865,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-sse4.2 @itemx -msse4 @itemx -mno-sse4 +@itemx -mavx +@itemx -mno-avx @itemx -maes @itemx -mno-aes @itemx -mpclmul @@ -10886,7 +10888,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or +SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, SSE5, ABM or 3DNow!@: extended instruction sets. These extensions are also available as built-in functions: see @ref{X86 Built-in Functions}, for details of the functions enabled and @@ -10895,6 +10897,10 @@ disabled by these switches. To have SSE/SSE2 instructions generated automatically from floating-point code (as opposed to 387 instructions), see @option{-mfpmath=sse}. +GCC depresses SSEx instructions when @option{-mavx} is used. Instead, it +generates new AVX instructions or AVX equivalence for all SSEx instructions +when needed. + These options will enable GCC to use these extended instructions in generated code, even without @option{-mfpmath=sse}. Applications which perform runtime CPU detection must compile separate files for each |