diff options
author | Hongyu Wang <hongyu.wang@intel.com> | 2021-09-28 15:30:14 +0800 |
---|---|---|
committer | Haochen Jiang <haochen.jiang@intel.com> | 2022-10-21 10:38:19 +0800 |
commit | 825d0041380378d978dfed6ea313e2ff9d2fce4c (patch) | |
tree | 722dabbb65a54d9642f43d4011d5ab27ca4294ec /gcc/config | |
parent | 47a6ae56584f5e3caaac74e158b77bf1c4fdd774 (diff) | |
download | gcc-825d0041380378d978dfed6ea313e2ff9d2fce4c.zip gcc-825d0041380378d978dfed6ea313e2ff9d2fce4c.tar.gz gcc-825d0041380378d978dfed6ea313e2ff9d2fce4c.tar.bz2 |
Support Intel AVX-IFMA
gcc/
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA_AVXIFMA_SET, OPTION_MASK_ISA2_AVXIFMA_UNSET,
OPTION_MASK_ISA2_AVX2_UNSET): New macro.
(ix86_handle_option): Handle -mavxifma.
* common/config/i386/i386-cpuinfo.h (processor_types): Add
FEATURE_AVXIFMA.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxifma.
* common/config/i386/cpuinfo.h (get_available_features):
Detect avxifma.
* config.gcc: Add avxifmaintrin.h
* config/i386/avx512ifmavlintrin.h: (_mm_madd52lo_epu64): Change
to macro.
(_mm_madd52hi_epu64): Likewise.
(_mm256_madd52lo_epu64): Likewise.
(_mm256_madd52hi_epu64): Likewise.
* config/i386/avxifmaintrin.h: New header.
* config/i386/cpuid.h (bit_AVXIFMA): New.
* config/i386/i386-builtin.def: Add new builtins, and correct
pattern names for AVX512IFMA.
* config/i386/i386-builtins.cc (def_builtin): Handle AVX-IFMA
builtins like AVX-VNNI.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXIFMA__.
* config/i386/i386-expand.cc (ix86_check_builtin_isa_match):
Relax ISA masks for AVXIFMA.
* config/i386/i386-isa.def: Add AVXIFMA.
* config/i386/i386-options.cc (isa2_opts): Add -mavxifma.
(ix86_valid_target_attribute_inner_p): Handle avxifma.
* config/i386/i386.md (isa): Add attr avxifma and avxifmavl.
* config/i386/i386.opt: Add option -mavxifma.
* config/i386/immintrin.h: Inculde avxifmaintrin.h.
* config/i386/sse.md (avx_vpmadd52<vpmadd52type>_<mode>):
Remove.
(vpamdd52<vpmadd52type><mode><sd_maskz_name>): Remove.
(vpamdd52huq<mode>_maskz): Rename to ...
(vpmadd52huq<mode>_maskz): ... this.
(vpamdd52luq<mode>_maskz): Rename to ...
(vpmadd52luq<mode>_maskz): ... this.
(vpmadd52<vpmadd52type><mode>): New define_insn.
(vpmadd52<vpmadd52type>v8di): Likewise.
(vpmadd52<vpmadd52type><mode>_maskz_1): Likewise.
(vpamdd52<vpmadd52type><mode>_mask): Rename to ...
(vpmadd52<vpmadd52type><mode>_mask): ... this.
* doc/invoke.texi: Document -mavxifma.
* doc/extend.texi: Document avxifma.
* doc/sourcebuild.texi: Document target avxifma.
gcc/testsuite/
* gcc.target/i386/avx-check.h: Add avxifma check.
* gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Remane..
* gcc.target/i386/avx512ifma-vpmaddhuq-1a.c: To this.
* gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto.
* gcc.target/i386/avx512ifma-vpmaddluq-1a.c: Ditto.
* gcc.target/i386/avx512ifma-vpmaddhuq-1b.c: New Test.
* gcc.target/i386/avx512ifma-vpmaddluq-1b.c: Ditto.
* gcc.target/i386/avx-ifma-1.c: Ditto.
* gcc.target/i386/avx-ifma-2.c: Ditto.
* gcc.target/i386/avx-ifma-3.c: Ditto.
* gcc.target/i386/avx-ifma-4.c: Ditto.
* gcc.target/i386/avx-ifma-5.c: Ditto.
* gcc.target/i386/avx-ifma-6.c: Ditto.
* gcc.target/i386/avx-ifma-vpmaddhuq-2.c: Ditto.
* gcc.target/i386/avx-ifma-vpmaddluq-2.c: Ditto.
* gcc.target/i386/sse-12.c: Add -mavxifma.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* lib/target-supports.exp
(check_effective_target_avxifma): New.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/avx512ifmavlintrin.h | 59 | ||||
-rw-r--r-- | gcc/config/i386/avxifmaintrin.h | 78 | ||||
-rw-r--r-- | gcc/config/i386/cpuid.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 28 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtins.cc | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.cc | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386-isa.def | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386-options.cc | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 5 | ||||
-rw-r--r-- | gcc/config/i386/immintrin.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 56 |
13 files changed, 195 insertions, 68 deletions
diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h index a7a50d8..506dce8 100644 --- a/gcc/config/i386/avx512ifmavlintrin.h +++ b/gcc/config/i386/avx512ifmavlintrin.h @@ -34,45 +34,26 @@ #define __DISABLE_AVX512IFMAVL__ #endif /* __AVX512IFMAVL__ */ -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X, - (__v2di) __Y, - (__v2di) __Z, - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X, - (__v4di) __Y, - (__v4di) __Z, - (__mmask8) -1); -} +#define _mm_madd52lo_epu64(A, B, C) \ + ((__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) (A), \ + (__v2di) (B), \ + (__v2di) (C))) + +#define _mm_madd52hi_epu64(A, B, C) \ + ((__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) (A), \ + (__v2di) (B), \ + (__v2di) (C))) + +#define _mm256_madd52lo_epu64(A, B, C) \ + ((__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) (A), \ + (__v4di) (B), \ + (__v4di) (C))) + + +#define _mm256_madd52hi_epu64(A, B, C) \ + ((__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) (A), \ + (__v4di) (B), \ + (__v4di) (C))) extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avxifmaintrin.h b/gcc/config/i386/avxifmaintrin.h new file mode 100644 index 0000000..3878d10 --- /dev/null +++ b/gcc/config/i386/avxifmaintrin.h @@ -0,0 +1,78 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVXIFMAINTRIN_H_INCLUDED +#define _AVXIFMAINTRIN_H_INCLUDED + +#ifndef __AVXIFMA__ +#pragma GCC push_options +#pragma GCC target("avxifma") +#define __DISABLE_AVXIFMA__ +#endif /* __AVXIFMA__ */ + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z); +} + +#ifdef __DISABLE_AVXIFMA__ +#undef __DISABLE_AVXIFMA__ +#pragma GCC pop_options +#endif /* __DISABLE_AVXIFMA__ */ + +#endif /* _AVXIFMAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index a4c2fed..9885699 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -28,6 +28,7 @@ #define bit_AVXVNNI (1 << 4) #define bit_AVX512BF16 (1 << 5) #define bit_HRESET (1 << 22) +#define bit_AVXIFMA (1 << 23) /* %ecx */ #define bit_SSE3 (1 << 0) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index dea52a2..d22d79d 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2486,18 +2486,22 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builti BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) /* AVX512IFMA */ -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52luqv4di, "__builtin_ia32_vpmadd52luq256", IX86_BUINTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv4di, "__builtin_ia32_vpmadd52huq256", IX86_BUINTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52luqv2di, "__builtin_ia32_vpmadd52luq128", IX86_BUINTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) +BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) /* AVX512VBMI */ BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 76668cc..9412cf1 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -279,10 +279,12 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) && (mask == 0 || (mask & ix86_isa_flags) != 0)) || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) - /* "Unified" builtin used by either AVXVNNI intrinsics or AVX512VNNIVL - non-mask intrinsics should be defined whenever avxvnni - or avx512vnni && avx512vl exist. */ + /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics + or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be + defined whenever avxvnni/avxifma or avx512vnni/avxifma && + avx512vl exist. */ || (mask2 == OPTION_MASK_ISA2_AVXVNNI) + || (mask2 == OPTION_MASK_ISA2_AVXIFMA) || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) { diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index eb0e3b3..3494ec0 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__WIDEKL__"); if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI) def_or_undef (parse_in, "__AVXVNNI__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA) + def_or_undef (parse_in, "__AVXIFMA__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 70fd82b..0e8ba14 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12367,6 +12367,8 @@ ix86_check_builtin_isa_match (unsigned int fcode, OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or OPTION_MASK_ISA2_AVXVNNI + (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512IFMA) or + OPTION_MASK_ISA2_AVXIFMA where for each such pair it is sufficient if either of the ISAs is enabled, plus if it is ored with other options also those others. OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ @@ -12396,6 +12398,17 @@ ix86_check_builtin_isa_match (unsigned int fcode, isa2 |= OPTION_MASK_ISA2_AVXVNNI; } + if ((((bisa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + || (bisa2 & OPTION_MASK_ISA2_AVXIFMA) != 0) + && (((isa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) + || (isa2 & OPTION_MASK_ISA2_AVXIFMA) != 0)) + { + isa |= OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL; + isa2 |= OPTION_MASK_ISA2_AVXIFMA; + } + if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE /* __builtin_ia32_maskmovq requires MMX registers. */ && fcode != IX86_BUILTIN_MASKMOVQ) diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 83659d0..6e0254c 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -109,3 +109,4 @@ DEF_PTA(KL) DEF_PTA(WIDEKL) DEF_PTA(AVXVNNI) DEF_PTA(AVX512FP16) +DEF_PTA(AVXIFMA) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index acb2291..5facb64 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -226,7 +226,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mkl", OPTION_MASK_ISA2_KL }, { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, - { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 } + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA } }; static struct ix86_target_opts isa_opts[] = { @@ -1072,6 +1073,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("hreset", OPT_mhreset), IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), + IX86_ATTR_ISA ("avxifma", OPT_mavxifma), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6688d92..93538c5 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -835,7 +835,8 @@ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, - avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16" + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, + avx512ifmavl" (const_string "base")) ;; Define instruction set of MMX instructions @@ -891,6 +892,9 @@ (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") (eq_attr "isa" "avx512fp16") (symbol_ref "TARGET_AVX512FP16") + (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA") + (eq_attr "isa" "avx512ifmavl") + (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 0dbaacb..36e28b70 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1214,3 +1214,8 @@ Do not use GOT to access external symbols. -param=x86-stlf-window-ninsns= Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param Instructions number above which STFL stall penalty can be compensated. + +mavxifma +Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and +AVXIFMA built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 6afd78c..e9d4e97 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -44,6 +44,8 @@ #include <avxvnniintrin.h> +#include <avxifmaintrin.h> + #include <avx2intrin.h> #include <avx512fintrin.h> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 076064f..33f306a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -27867,7 +27867,7 @@ (define_int_attr vpmadd52type [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")]) -(define_expand "vpamdd52huq<mode>_maskz" +(define_expand "vpmadd52huq<mode>_maskz" [(match_operand:VI8_AVX512VL 0 "register_operand") (match_operand:VI8_AVX512VL 1 "register_operand") (match_operand:VI8_AVX512VL 2 "register_operand") @@ -27875,13 +27875,13 @@ (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512IFMA" { - emit_insn (gen_vpamdd52huq<mode>_maskz_1 ( + emit_insn (gen_vpmadd52huq<mode>_maskz_1 ( operands[0], operands[1], operands[2], operands[3], CONST0_RTX (<MODE>mode), operands[4])); DONE; }) -(define_expand "vpamdd52luq<mode>_maskz" +(define_expand "vpmadd52luq<mode>_maskz" [(match_operand:VI8_AVX512VL 0 "register_operand") (match_operand:VI8_AVX512VL 1 "register_operand") (match_operand:VI8_AVX512VL 2 "register_operand") @@ -27889,26 +27889,58 @@ (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512IFMA" { - emit_insn (gen_vpamdd52luq<mode>_maskz_1 ( + emit_insn (gen_vpmadd52luq<mode>_maskz_1 ( operands[0], operands[1], operands[2], operands[3], CONST0_RTX (<MODE>mode), operands[4])); DONE; }) -(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>" - [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") - (unspec:VI8_AVX512VL - [(match_operand:VI8_AVX512VL 1 "register_operand" "0") - (match_operand:VI8_AVX512VL 2 "register_operand" "v") - (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] +(define_insn "vpmadd52<vpmadd52type>v8di" + [(set (match_operand:V8DI 0 "register_operand" "=v") + (unspec:V8DI + [(match_operand:V8DI 1 "register_operand" "0") + (match_operand:V8DI 2 "register_operand" "v") + (match_operand:V8DI 3 "nonimmediate_operand" "vm")] VPMADD52))] "TARGET_AVX512IFMA" - "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}" + "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_insn "vpmadd52<vpmadd52type><mode>" + [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,v") + (unspec:VI8_AVX2 + [(match_operand:VI8_AVX2 1 "register_operand" "0,0") + (match_operand:VI8_AVX2 2 "register_operand" "x,v") + (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm,vm")] + VPMADD52))] + "TARGET_AVXIFMA || (TARGET_AVX512IFMA && TARGET_AVX512VL)" + "@ + %{vex%} vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3} + vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "isa" "avxifma,avx512ifmavl") + (set_attr "type" "ssemuladd") + (set_attr "prefix" "vex,evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "vpmadd52<vpmadd52type><mode>_maskz_1" + [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI8_AVX512VL + (unspec:VI8_AVX512VL + [(match_operand:VI8_AVX512VL 1 "register_operand" "0") + (match_operand:VI8_AVX512VL 2 "register_operand" "v") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] + VPMADD52) + (match_operand:VI8_AVX512VL 4 "const0_operand" "C") + (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] + "TARGET_AVX512IFMA" + "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "vpamdd52<vpmadd52type><mode>_mask" +(define_insn "vpmadd52<vpmadd52type><mode>_mask" [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") (vec_merge:VI8_AVX512VL (unspec:VI8_AVX512VL |