diff options
author | Ilya Tocar <ilya.tocar@intel.com> | 2011-08-30 14:02:53 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2011-08-30 07:02:53 -0700 |
commit | 2ddd46d69b09a88fb82832285b69090fa08bddc2 (patch) | |
tree | 175e06db22b216944eb8ef409716322c278cd349 /gcc | |
parent | c199ccf75867fa7287570ff1ec19ef76cc9d5ea6 (diff) | |
download | gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.zip gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.tar.gz gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.tar.bz2 |
Add FMA intrinsics and testcases.
gcc/
2011-08-30 Ilya Tocar <ilya.tocar@intel.com>
* config/i386/fmaintrin.h: New.
* config.gcc: Add fmaintrin.h.
* config/i386/i386.c
(enum ix86_builtins) <IX86_BUILTIN_VFMADDSS3>: New.
<IX86_BUILTIN_VFMADDSD3>: Likewise.
* config/i386/sse.md (fmai_vmfmadd_<mode>): New.
(*fmai_fmadd_<mode>): Likewise.
(*fmai_fmsub_<mode>): Likewise.
(*fmai_fnmadd_<mode>): Likewise.
(*fmai_fnmsub_<mode>): Likewise.
* config/i386/immintrin.h: Add fmaintrin.h.
gcc/testsuite/
2011-08-30 Ilya Tocar <ilya.tocar@intel.com>
* gcc.target/i386/fma-check.h: New.
* gcc.target/i386/fma-256-fmaddXX.c: New testcase.
* gcc.target/i386/fma-256-fmaddsubXX.c: Likewise.
* gcc.target/i386/fma-256-fmsubXX.c: Likewise.
* gcc.target/i386/fma-256-fmsubaddXX.c: Likewise.
* gcc.target/i386/fma-256-fnmaddXX.c: Likewise.
* gcc.target/i386/fma-256-fnmsubXX.c: Likewise.
* gcc.target/i386/fma-fmaddXX.c: Likewise.
* gcc.target/i386/fma-fmaddsubXX.c: Likewise.
* gcc.target/i386/fma-fmsubXX.c: Likewise.
* gcc.target/i386/fma-fmsubaddXX.c: Likewise.
* gcc.target/i386/fma-fnmaddXX.c: Likewise.
* gcc.target/i386/fma-fnmsubXX.c: Likewise.
* gcc.target/i386/fma-compile.c: Likewise.
* gcc.target/i386/i386.exp (check_effective_target_fma): New.
* gcc.target/i386/sse-12.c: Add -mfma.
* gcc.target/i386/sse-13.c: Likewise.
* gcc.target/i386/sse-14.c: Likewise.
* gcc.target/i386/sse-22.c: Likewise.
* gcc.target/i386/sse-23.c: Likewise.
* g++.dg/other/i386-2.C: Likewise.
* g++.dg/other/i386-3.C: Likewise.
From-SVN: r178311
Diffstat (limited to 'gcc')
29 files changed, 1614 insertions, 21 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b89303d..03db902 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2011-08-30 Ilya Tocar <ilya.tocar@intel.com> + + * config/i386/fmaintrin.h: New. + * config.gcc: Add fmaintrin.h. + * config/i386/i386.c + (enum ix86_builtins) <IX86_BUILTIN_VFMADDSS3>: New. + <IX86_BUILTIN_VFMADDSD3>: Likewise. + * config/i386/sse.md (fmai_vmfmadd_<mode>): New. + (*fmai_fmadd_<mode>): Likewise. + (*fmai_fmsub_<mode>): Likewise. + (*fmai_fnmadd_<mode>): Likewise. + (*fmai_fnmsub_<mode>): Likewise. + * config/i386/immintrin.h: Add fmaintrin.h. + 2011-08-30 Bernd Schmidt <bernds@codesourcery.com> * genautomata.c (NO_COMB_OPTION): New macro. diff --git a/gcc/config.gcc b/gcc/config.gcc index 67aae86..81b542c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -353,7 +353,7 @@ i[34567]86-*-*) immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h - avx2intrin.h" + avx2intrin.h fmaintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -366,7 +366,7 @@ x86_64-*-*) immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h - avx2intrin.h" + avx2intrin.h fmaintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h new file mode 100644 index 0000000..9ec9d17 --- /dev/null +++ b/gcc/config/i386/fmaintrin.h @@ -0,0 +1,297 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _FMAINTRIN_H_INCLUDED +#define _FMAINTRIN_H_INCLUDED + +#ifndef __FMA__ +# error "FMA instruction set not enabled" +#else + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, + (__v2df)__C); +} + +extern __inline __m256d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, + (__v4df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); +} + +extern __inline __m256 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, + (__v2df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, + -(__v2df)__C); +} + +extern __inline __m256d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, + -(__v4df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); +} + +extern __inline __m256 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, + -(__v2df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, + (__v2df)__C); +} + +extern __inline __m256d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, + (__v4df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); +} + +extern __inline __m256 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, + (__v2df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); +} + +extern __inline __m256d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); +} + +extern __inline __m256 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, + (__v2df)__C); +} + +extern __inline __m256d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, + (__v4df)__B, + (__v4df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); +} + +extern __inline __m256 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, + (__v8sf)__B, + (__v8sf)__C); +} + +extern __inline __m128d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, + -(__v2df)__C); +} + +extern __inline __m256d +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, + (__v4df)__B, + -(__v4df)__C); +} + +extern __inline __m128 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); +} + +extern __inline __m256 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, + (__v8sf)__B, + -(__v8sf)__C); +} + +#endif + +#endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 32495ee..504f013 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24055,7 +24055,7 @@ enum ix86_builtins IX86_BUILTIN_VEC_PERM_V4DF, IX86_BUILTIN_VEC_PERM_V8SF, - /* FMA4 and XOP instructions. */ + /* FMA4 instructions. */ IX86_BUILTIN_VFMADDSS, IX86_BUILTIN_VFMADDSD, IX86_BUILTIN_VFMADDPS, @@ -24067,6 +24067,11 @@ enum ix86_builtins IX86_BUILTIN_VFMADDSUBPS256, IX86_BUILTIN_VFMADDSUBPD256, + /* FMA3 instructions. */ + IX86_BUILTIN_VFMADDSS3, + IX86_BUILTIN_VFMADDSD3, + + /* XOP instructions. */ IX86_BUILTIN_VPCMOV, IX86_BUILTIN_VPCMOV_V2DI, IX86_BUILTIN_VPCMOV_V4SI, @@ -25450,6 +25455,13 @@ static const struct builtin_description bdesc_multi_arg[] = "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, + "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3, + UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, + "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, + UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index d2e715f..102814e 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -72,6 +72,10 @@ #include <bmi2intrin.h> #endif +#ifdef __FMA__ +#include <fmaintrin.h> +#endif + #ifdef __RDRND__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index fa22e9a..8ce3e3a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1719,6 +1719,89 @@ operands[4] = CONST0_RTX (<MODE>mode); }) +(define_expand "fmai_vmfmadd_<mode>" + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand") + (match_operand:VF_128 2 "nonimmediate_operand") + (match_operand:VF_128 3 "nonimmediate_operand")) + (match_dup 0) + (const_int 1)))] + "TARGET_FMA") + +(define_insn "*fmai_fmadd_<mode>" + [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")) + (match_dup 0) + (const_int 1)))] + "TARGET_FMA" + "@ + vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fmai_fmsub_<mode>" + [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") + (neg:VF_128 + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))) + (match_dup 0) + (const_int 1)))] + "TARGET_FMA" + "@ + vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fmai_fnmadd_<mode>" + [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")) + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")) + (match_dup 0) + (const_int 1)))] + "TARGET_FMA" + "@ + vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fmai_fnmsub_<mode>" + [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")) + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") + (neg:VF_128 + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))) + (match_dup 0) + (const_int 1)))] + "TARGET_FMA" + "@ + vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "*fma4i_vmfmadd_<mode>" [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 86b8019..55d74a9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,28 @@ +2011-08-30 Ilya Tocar <ilya.tocar@intel.com> + + * gcc.target/i386/fma-check.h: New. + * gcc.target/i386/fma-256-fmaddXX.c: New testcase. + * gcc.target/i386/fma-256-fmaddsubXX.c: Likewise. + * gcc.target/i386/fma-256-fmsubXX.c: Likewise. + * gcc.target/i386/fma-256-fmsubaddXX.c: Likewise. + * gcc.target/i386/fma-256-fnmaddXX.c: Likewise. + * gcc.target/i386/fma-256-fnmsubXX.c: Likewise. + * gcc.target/i386/fma-fmaddXX.c: Likewise. + * gcc.target/i386/fma-fmaddsubXX.c: Likewise. + * gcc.target/i386/fma-fmsubXX.c: Likewise. + * gcc.target/i386/fma-fmsubaddXX.c: Likewise. + * gcc.target/i386/fma-fnmaddXX.c: Likewise. + * gcc.target/i386/fma-fnmsubXX.c: Likewise. + * gcc.target/i386/fma-compile.c: Likewise. + * gcc.target/i386/i386.exp (check_effective_target_fma): New. + * gcc.target/i386/sse-12.c: Add -mfma. + * gcc.target/i386/sse-13.c: Likewise. + * gcc.target/i386/sse-14.c: Likewise. + * gcc.target/i386/sse-22.c: Likewise. + * gcc.target/i386/sse-23.c: Likewise. + * g++.dg/other/i386-2.C: Likewise. + * g++.dg/other/i386-3.C: Likewise. + 2011-08-30 Kirill Yukhin <kirill.yukhin@intel.com> PR testsuite/50185 diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 8c9c911..e8237a4 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,9 +1,10 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, - popcntintrin.h and mm_malloc.h.h are usable with -O -pedantic-errors. */ + popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with + -O -pedantic-errors. */ #include <x86intrin.h> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index d8c6f8d..9abbd32 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,9 +1,10 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, - popcntintrin.h and mm_malloc.h are usable with + popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with -O -fkeep-inline-functions. */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c new file mode 100644 index 0000000..7e73402 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) +{ + union256d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[4]; + int i; + e.x = _mm256_fmadd_pd (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] + c.a[i]; + } + if (check_union256d (e, d)) + abort (); +} + +void +check_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) +{ + union256 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[8]; + int i; + e.x = _mm256_fmadd_ps (__A, __B, __C); + for (i = 0; i < 8; i++) + { + d[i] = a.a[i] * b.a[i] + c.a[i]; + } + if (check_union256 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union256 c[3]; + union256d d[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 8; j++) + c[i].a[j] = i * j + 3.5; + for (j = 0; j < 4; j++) + d[i].a[j] = i * j + 3.5; + } + check_mm256_fmadd_pd (d[0].x, d[1].x, d[2].x); + check_mm256_fmadd_ps (c[0].x, c[1].x, c[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c new file mode 100644 index 0000000..4b61ad5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) +{ + union256 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[8]; + int i; + e.x = _mm256_fmaddsub_ps (__A, __B, __C); + for (i = 0; i < 8; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]); + } + if (check_union256 (e, d)) + abort (); +} + +void +check_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) +{ + union256d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[4]; + int i; + e.x = _mm256_fmaddsub_pd (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]); + } + if (check_union256d (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union256 c[3]; + union256d d[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 8; j++) + c[i].a[j] = i * j + 3.5; + for (j = 0; j < 4; j++) + d[i].a[j] = i * j + 3.5; + } + check_mm256_fmaddsub_pd (d[0].x, d[1].x, d[2].x); + check_mm256_fmaddsub_ps (c[0].x, c[1].x, c[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c new file mode 100644 index 0000000..d92aec0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + + +void +check_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) +{ + union256d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[4]; + int i; + e.x = _mm256_fmsub_pd (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] - c.a[i]; + } + if (check_union256d (e, d)) + abort (); +} + +void +check_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) +{ + union256 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[8]; + int i; + e.x = _mm256_fmsub_ps (__A, __B, __C); + for (i = 0; i < 8; i++) + { + d[i] = a.a[i] * b.a[i] - c.a[i]; + } + if (check_union256 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union256 c[3]; + union256d d[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 8; j++) + c[i].a[j] = i * j + 3.5; + for (j = 0; j < 4; j++) + d[i].a[j] = i * j + 3.5; + } + check_mm256_fmsub_pd (d[0].x, d[1].x, d[2].x); + check_mm256_fmsub_ps (c[0].x, c[1].x, c[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c new file mode 100644 index 0000000..84a41c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) +{ + union256 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[8]; + int i; + e.x = _mm256_fmsubadd_ps (__A, __B, __C); + for (i = 0; i < 8; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]); + } + if (check_union256 (e, d)) + abort (); +} + +void +check_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) +{ + union256d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[4]; + int i; + e.x = _mm256_fmsubadd_pd (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]); + } + if (check_union256d (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union256 c[3]; + union256d d[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 8; j++) + c[i].a[j] = i * j + 3.5; + for (j = 0; j < 4; j++) + d[i].a[j] = i * j + 3.5; + } + check_mm256_fmsubadd_pd (d[0].x, d[1].x, d[2].x); + check_mm256_fmsubadd_ps (c[0].x, c[1].x, c[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c new file mode 100644 index 0000000..c0dfa69 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) +{ + union256d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[4]; + int i; + e.x = _mm256_fnmadd_pd (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = -a.a[i] * b.a[i] + c.a[i]; + } + if (check_union256d (e, d)) + abort (); +} + +void +check_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) +{ + union256 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[8]; + int i; + e.x = _mm256_fnmadd_ps (__A, __B, __C); + for (i = 0; i < 8; i++) + { + d[i] = -a.a[i] * b.a[i] + c.a[i]; + } + if (check_union256 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union256 c[3]; + union256d d[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 8; j++) + c[i].a[j] = i * j + 3.5; + for (j = 0; j < 4; j++) + d[i].a[j] = i * j + 3.5; + } + check_mm256_fnmadd_pd (d[0].x, d[1].x, d[2].x); + check_mm256_fnmadd_ps (c[0].x, c[1].x, c[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c new file mode 100644 index 0000000..ac4705e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + + +void +check_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) +{ + union256d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[4]; + int i; + e.x = _mm256_fnmsub_pd (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = -a.a[i] * b.a[i] - c.a[i]; + } + if (check_union256d (e, d)) + abort (); +} + +void +check_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) +{ + union256 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[8]; + int i; + e.x = _mm256_fnmsub_ps (__A, __B, __C); + for (i = 0; i < 8; i++) + { + d[i] = -a.a[i] * b.a[i] - c.a[i]; + } + if (check_union256 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union256 c[3]; + union256d d[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 8; j++) + c[i].a[j] = i * j + 3.5; + for (j = 0; j < 4; j++) + d[i].a[j] = i * j + 3.5; + } + check_mm256_fnmsub_pd (d[0].x, d[1].x, d[2].x); + check_mm256_fnmsub_ps (c[0].x, c[1].x, c[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-check.h b/gcc/testsuite/gcc.target/i386/fma-check.h new file mode 100644 index 0000000..696c4a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-check.h @@ -0,0 +1,25 @@ +#include <stdlib.h> + +#include "cpuid.h" + +static void fma_test (void); + +static void __attribute__ ((noinline)) do_test (void) +{ + fma_test (); +} + +int +main () +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return 0; + + /* Run FMA test only if host has FMA support. */ + if (ecx & bit_FMA) + do_test (); + + exit (0); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-compile.c b/gcc/testsuite/gcc.target/i386/fma-compile.c new file mode 100644 index 0000000..6d5daa5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-compile.c @@ -0,0 +1,221 @@ +/* Test that the compiler properly generates floating point multiply + and add instructions FMA systems. */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -mfma" } */ + +#include <x86intrin.h> + +__m128d +check_mm_fmadd_pd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fmadd_pd (a, b, c); +} + +__m256d +check_mm256_fmadd_pd (__m256d a, __m256d b, __m256d c) +{ + return _mm256_fmadd_pd (a, b, c); +} + +__m128 +check_mm_fmadd_ps (__m128 a, __m128 b, __m128 c) +{ + return _mm_fmadd_ps (a, b, c); +} + +__m256 +check_mm256_fmadd_ps (__m256 a, __m256 b, __m256 c) +{ + return _mm256_fmadd_ps (a, b, c); +} + +__m128d +check_mm_fmadd_sd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fmadd_sd (a, b, c); +} + +__m128 +check_mm_fmadd_ss (__m128 a, __m128 b, __m128 c) +{ + return _mm_fmadd_ss (a, b, c); +} + +__m128d +check_mm_fmsub_pd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fmsub_pd (a, b, c); +} + +__m256d +check_mm256_fmsub_pd (__m256d a, __m256d b, __m256d c) +{ + return _mm256_fmsub_pd (a, b, c); +} + +__m128 +check_mm_fmsub_ps (__m128 a, __m128 b, __m128 c) +{ + return _mm_fmsub_ps (a, b, c); +} + +__m256 +check_mm256_fmsub_ps (__m256 a, __m256 b, __m256 c) +{ + return _mm256_fmsub_ps (a, b, c); +} + +__m128d +check_mm_fmsub_sd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fmsub_sd (a, b, c); +} + +__m128 +check_mm_fmsub_ss (__m128 a, __m128 b, __m128 c) +{ + return _mm_fmsub_ss (a, b, c); +} + +__m128d +check_mm_fnmadd_pd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fnmadd_pd (a, b, c); +} + +__m256d +check_mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c) +{ + return _mm256_fnmadd_pd (a, b, c); +} + +__m128 +check_mm_fnmadd_ps (__m128 a, __m128 b, __m128 c) +{ + return _mm_fnmadd_ps (a, b, c); +} + +__m256 +check_mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c) +{ + return _mm256_fnmadd_ps (a, b, c); +} + +__m128d +check_mm_fnmadd_sd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fnmadd_sd (a, b, c); +} + +__m128 +check_mm_fnmadd_ss (__m128 a, __m128 b, __m128 c) +{ + return _mm_fnmadd_ss (a, b, c); +} + +__m128d +check_mm_fnmsub_pd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fnmsub_pd (a, b, c); +} + +__m256d +check_mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c) +{ + return _mm256_fnmsub_pd (a, b, c); +} + +__m128 +check_mm_fnmsub_ps (__m128 a, __m128 b, __m128 c) +{ + return _mm_fnmsub_ps (a, b, c); +} + +__m256 +check_mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c) +{ + return _mm256_fnmsub_ps (a, b, c); +} + +__m128d +check_mm_fnmsub_sd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fnmsub_sd (a, b, c); +} + +__m128 +check_mm_fnmsub_ss (__m128 a, __m128 b, __m128 c) +{ + return _mm_fnmsub_ss (a, b, c); +} + +__m128d +check_mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fmaddsub_pd (a, b, c); +} + +__m256d +check_mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c) +{ + return _mm256_fmaddsub_pd (a, b, c); +} + +__m128 +check_mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c) +{ + return _mm_fmaddsub_ps (a, b, c); +} + +__m256 +check_mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c) +{ + return _mm256_fmaddsub_ps (a, b, c); +} + +__m128d +check_mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c) +{ + return _mm_fmsubadd_pd (a, b, c); +} + +__m256d +check_mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c) +{ + return _mm256_fmsubadd_pd (a, b, c); +} + +__m128 +check_mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c) +{ + return _mm_fmsubadd_ps (a, b, c); +} + +__m256 +check_mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c) +{ + return _mm256_fmsubadd_ps (a, b, c); +} + + +/* { dg-final { scan-assembler-times "vfmadd[^s]..ps" 2 } } */ +/* { dg-final { scan-assembler-times "vfmsub[^s]..ps" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps" 2 } } */ +/* { dg-final { scan-assembler-times "vfmaddsub...ps" 2 } } */ +/* { dg-final { scan-assembler-times "vfmsubadd...ps" 2 } } */ +/* { dg-final { scan-assembler-times "vfmadd[^s]..pd" 2 } } */ +/* { dg-final { scan-assembler-times "vfmsub[^s]..pd" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...pd" 2 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...pd" 2 } } */ +/* { dg-final { scan-assembler-times "vfmaddsub...pd" 2 } } */ +/* { dg-final { scan-assembler-times "vfmsubadd...pd" 2 } } */ +/* { dg-final { scan-assembler-times "vfmadd[^s]..ss" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub[^s]..ss" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ss" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ss" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd[^s]..sd" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub[^s]..sd" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...sd" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...sd" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c new file mode 100644 index 0000000..43ef9e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fmadd_pd (__A, __B, __C); + for (i = 0; i < 2; i++) + { + d[i] = a.a[i] * b.a[i] + c.a[i]; + } + + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fmadd_ps (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] + c.a[i]; + } + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fmadd_sd (__A, __B, __C); + for (i = 1; i < 2; i++) + { + d[i] = a.a[i]; + } + d[0] = a.a[0] * b.a[0] + c.a[0]; + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fmadd_ss (__A, __B, __C); + for (i = 1; i < 4; i++) + { + d[i] = a.a[i]; + } + d[0] = a.a[0] * b.a[0] + c.a[0]; + if (check_union128 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union128 a[3]; + union128d b[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 4; j++) + a[i].a[j] = i * j + 3.5; + for (j = 0; j < 2; j++) + b[i].a[j] = i * j + 3.5; + } + check_mm_fmadd_pd (b[0].x, b[1].x, b[2].x); + check_mm_fmadd_sd (b[0].x, b[1].x, b[2].x); + check_mm_fmadd_ps (a[0].x, a[1].x, a[2].x); + check_mm_fmadd_ss (a[0].x, a[1].x, a[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c new file mode 100644 index 0000000..89c8163 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fmaddsub_ps (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]); + } + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fmaddsub_pd (__A, __B, __C); + for (i = 0; i < 2; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]); + } + if (check_union128d (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union128 a[3]; + union128d b[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 4; j++) + a[i].a[j] = i * j + 3.5; + for (j = 0; j < 2; j++) + b[i].a[j] = i * j + 3.5; + } + check_mm_fmaddsub_pd (b[0].x, b[1].x, b[2].x); + check_mm_fmaddsub_ps (a[0].x, a[1].x, a[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c new file mode 100644 index 0000000..3d92d4b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fmsub_pd (__A, __B, __C); + for (i = 0; i < 2; i++) + { + d[i] = a.a[i] * b.a[i] - c.a[i]; + } + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fmsub_ps (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] - c.a[i]; + } + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fmsub_sd (__A, __B, __C); + for (i = 1; i < 2; i++) + { + d[i] = a.a[i]; + } + d[0] = a.a[0] * b.a[0] - c.a[0]; + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fmsub_ss (__A, __B, __C); + for (i = 1; i < 4; i++) + { + d[i] = a.a[i]; + } + d[0] = a.a[0] * b.a[0] - c.a[0]; + if (check_union128 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union128 a[3]; + union128d b[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 4; j++) + a[i].a[j] = i * j + 3.5; + for (j = 0; j < 2; j++) + b[i].a[j] = i * j + 3.5; + } + check_mm_fmsub_pd (b[0].x, b[1].x, b[2].x); + check_mm_fmsub_sd (b[0].x, b[1].x, b[2].x); + check_mm_fmsub_ps (a[0].x, a[1].x, a[2].x); + check_mm_fmsub_ss (a[0].x, a[1].x, a[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c new file mode 100644 index 0000000..b03f875 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fmsubadd_ps (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]); + } + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fmsubadd_pd (__A, __B, __C); + for (i = 0; i < 2; i++) + { + d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]); + } + if (check_union128d (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union128 a[3]; + union128d b[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 4; j++) + a[i].a[j] = i * j + 3.5; + for (j = 0; j < 2; j++) + b[i].a[j] = i * j + 3.5; + } + check_mm_fmsubadd_pd (b[0].x, b[1].x, b[2].x); + check_mm_fmsubadd_ps (a[0].x, a[1].x, a[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c new file mode 100644 index 0000000..f23a6c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fnmadd_ps (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = -a.a[i] * b.a[i] + c.a[i]; + } + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fnmadd_pd (__A, __B, __C); + for (i = 0; i < 2; i++) + { + d[i] = -a.a[i] * b.a[i] + c.a[i]; + } + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fnmadd_sd (__A, __B, __C); + for (i = 1; i < 2; i++) + { + d[i] = a.a[i]; + } + d[0] = -a.a[0] * b.a[0] + c.a[0]; + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fnmadd_ss (__A, __B, __C); + for (i = 1; i < 4; i++) + { + d[i] = a.a[i]; + } + d[0] = -a.a[0] * b.a[0] + c.a[0]; + if (check_union128 (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union128 a[3]; + union128d b[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 4; j++) + a[i].a[j] = i * j + 3.5; + for (j = 0; j < 2; j++) + b[i].a[j] = i * j + 3.5; + } + check_mm_fnmadd_pd (b[0].x, b[1].x, b[2].x); + check_mm_fnmadd_sd (b[0].x, b[1].x, b[2].x); + check_mm_fnmadd_ps (a[0].x, a[1].x, a[2].x); + check_mm_fnmadd_ss (a[0].x, a[1].x, a[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c new file mode 100644 index 0000000..d17c7f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma" } */ + +#include "fma-check.h" + +#include <x86intrin.h> +#include "m256-check.h" + +void +check_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fnmsub_sd (__A, __B, __C); + for (i = 1; i < 2; i++) + { + d[i] = a.a[i]; + } + d[0] = -a.a[0] * b.a[0] - c.a[0]; + if (check_union128d (e, d)) + abort (); +} + +void +check_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fnmsub_ss (__A, __B, __C); + for (i = 1; i < 4; i++) + { + d[i] = a.a[i]; + } + d[0] = -a.a[0] * b.a[0] - c.a[0]; + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) +{ + union128 a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + float d[4]; + int i; + e.x = _mm_fnmsub_ps (__A, __B, __C); + for (i = 0; i < 4; i++) + { + d[i] = -a.a[i] * b.a[i] - c.a[i]; + } + if (check_union128 (e, d)) + abort (); +} + +void +check_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) +{ + union128d a, b, c, e; + a.x = __A; + b.x = __B; + c.x = __C; + double d[2]; + int i; + e.x = _mm_fnmsub_pd (__A, __B, __C); + for (i = 0; i < 2; i++) + { + d[i] = -a.a[i] * b.a[i] - c.a[i]; + } + if (check_union128d (e, d)) + abort (); +} + +static void +fma_test (void) +{ + union128 a[3]; + union128d b[3]; + int i, j; + for (i = 0; i < 3; i++) + { + for (j = 0; j < 4; j++) + a[i].a[j] = i * j + 3.5; + for (j = 0; j < 2; j++) + b[i].a[j] = i * j + 3.5; + } + check_mm_fnmsub_pd (b[0].x, b[1].x, b[2].x); + check_mm_fnmsub_sd (b[0].x, b[1].x, b[2].x); + check_mm_fnmsub_ps (a[0].x, a[1].x, a[2].x); + check_mm_fnmsub_ss (a[0].x, a[1].x, a[2].x); +} diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index 6517d45..75bea9b 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -172,6 +172,20 @@ proc check_effective_target_fma4 { } { } "-O2 -mfma4" ] } +# Return 1 if fma instructions can be compiled. +proc check_effective_target_fma { } { + return [check_no_compiler_messages fma object { + typedef float __m128 __attribute__ ((__vector_size__ (16))); + typedef float __v4sf __attribute__ ((__vector_size__ (16))); + __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) + { + return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, + (__v4sf)__B, + (__v4sf)__C); + } + } "-O2 -mfma" ] +} + # Return 1 if xop instructions can be compiled. proc check_effective_target_xop { } { return [check_no_compiler_messages xop object { diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 9f3713c..66a36c6 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 134905d..4bc0a2e 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,13 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */ #include <mm_malloc.h> /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, - tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that - reference the proper builtin functions. + tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h + that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index c1f10f1..6451166 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,12 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */ #include <mm_malloc.h> /* Test that the intrinsics compile without optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, - fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, - lwpintrin.h and mm_malloc.h that reference the proper builtin functions. + fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, + lwpintrin.h, fmaintrin.h and mm_malloc.h that reference the proper + builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 89ea7b3..9ccb92d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -7,8 +7,8 @@ /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, - tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that - reference the proper builtin functions. + tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h + that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -255,9 +255,9 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) #endif #include <popcntintrin.h> -/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT). */ +/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt") +#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma") #endif #include <x86intrin.h> /* xopintrin.h */ diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index ef2471c..462f8c9 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -6,8 +6,8 @@ /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, - tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that - reference the proper builtin functions. + tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h + that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -180,7 +180,7 @@ #define __builtin_ia32_gatherdiv4si(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si(X, Y, Z, K, 1) #define __builtin_ia32_gatherdiv4si256(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si256(X, Y, Z, K, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma") #include <wmmintrin.h> #include <smmintrin.h> #include <mm3dnow.h> |