aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorHongyu Wang <hongyu.wang@intel.com>2021-09-28 15:30:14 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2022-10-21 10:38:19 +0800
commit825d0041380378d978dfed6ea313e2ff9d2fce4c (patch)
tree722dabbb65a54d9642f43d4011d5ab27ca4294ec /gcc/config
parent47a6ae56584f5e3caaac74e158b77bf1c4fdd774 (diff)
downloadgcc-825d0041380378d978dfed6ea313e2ff9d2fce4c.zip
gcc-825d0041380378d978dfed6ea313e2ff9d2fce4c.tar.gz
gcc-825d0041380378d978dfed6ea313e2ff9d2fce4c.tar.bz2
Support Intel AVX-IFMA
gcc/ * common/config/i386/i386-common.cc (OPTION_MASK_ISA_AVXIFMA_SET, OPTION_MASK_ISA2_AVXIFMA_UNSET, OPTION_MASK_ISA2_AVX2_UNSET): New macro. (ix86_handle_option): Handle -mavxifma. * common/config/i386/i386-cpuinfo.h (processor_types): Add FEATURE_AVXIFMA. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for avxifma. * common/config/i386/cpuinfo.h (get_available_features): Detect avxifma. * config.gcc: Add avxifmaintrin.h * config/i386/avx512ifmavlintrin.h: (_mm_madd52lo_epu64): Change to macro. (_mm_madd52hi_epu64): Likewise. (_mm256_madd52lo_epu64): Likewise. (_mm256_madd52hi_epu64): Likewise. * config/i386/avxifmaintrin.h: New header. * config/i386/cpuid.h (bit_AVXIFMA): New. * config/i386/i386-builtin.def: Add new builtins, and correct pattern names for AVX512IFMA. * config/i386/i386-builtins.cc (def_builtin): Handle AVX-IFMA builtins like AVX-VNNI. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __AVXIFMA__. * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): Relax ISA masks for AVXIFMA. * config/i386/i386-isa.def: Add AVXIFMA. * config/i386/i386-options.cc (isa2_opts): Add -mavxifma. (ix86_valid_target_attribute_inner_p): Handle avxifma. * config/i386/i386.md (isa): Add attr avxifma and avxifmavl. * config/i386/i386.opt: Add option -mavxifma. * config/i386/immintrin.h: Inculde avxifmaintrin.h. * config/i386/sse.md (avx_vpmadd52<vpmadd52type>_<mode>): Remove. (vpamdd52<vpmadd52type><mode><sd_maskz_name>): Remove. (vpamdd52huq<mode>_maskz): Rename to ... (vpmadd52huq<mode>_maskz): ... this. (vpamdd52luq<mode>_maskz): Rename to ... (vpmadd52luq<mode>_maskz): ... this. (vpmadd52<vpmadd52type><mode>): New define_insn. (vpmadd52<vpmadd52type>v8di): Likewise. (vpmadd52<vpmadd52type><mode>_maskz_1): Likewise. (vpamdd52<vpmadd52type><mode>_mask): Rename to ... (vpmadd52<vpmadd52type><mode>_mask): ... this. * doc/invoke.texi: Document -mavxifma. * doc/extend.texi: Document avxifma. * doc/sourcebuild.texi: Document target avxifma. gcc/testsuite/ * gcc.target/i386/avx-check.h: Add avxifma check. * gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Remane.. * gcc.target/i386/avx512ifma-vpmaddhuq-1a.c: To this. * gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddluq-1a.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddhuq-1b.c: New Test. * gcc.target/i386/avx512ifma-vpmaddluq-1b.c: Ditto. * gcc.target/i386/avx-ifma-1.c: Ditto. * gcc.target/i386/avx-ifma-2.c: Ditto. * gcc.target/i386/avx-ifma-3.c: Ditto. * gcc.target/i386/avx-ifma-4.c: Ditto. * gcc.target/i386/avx-ifma-5.c: Ditto. * gcc.target/i386/avx-ifma-6.c: Ditto. * gcc.target/i386/avx-ifma-vpmaddhuq-2.c: Ditto. * gcc.target/i386/avx-ifma-vpmaddluq-2.c: Ditto. * gcc.target/i386/sse-12.c: Add -mavxifma. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * lib/target-supports.exp (check_effective_target_avxifma): New.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/avx512ifmavlintrin.h59
-rw-r--r--gcc/config/i386/avxifmaintrin.h78
-rw-r--r--gcc/config/i386/cpuid.h1
-rw-r--r--gcc/config/i386/i386-builtin.def28
-rw-r--r--gcc/config/i386/i386-builtins.cc8
-rw-r--r--gcc/config/i386/i386-c.cc2
-rw-r--r--gcc/config/i386/i386-expand.cc13
-rw-r--r--gcc/config/i386/i386-isa.def1
-rw-r--r--gcc/config/i386/i386-options.cc4
-rw-r--r--gcc/config/i386/i386.md6
-rw-r--r--gcc/config/i386/i386.opt5
-rw-r--r--gcc/config/i386/immintrin.h2
-rw-r--r--gcc/config/i386/sse.md56
13 files changed, 195 insertions, 68 deletions
diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h
index a7a50d8..506dce8 100644
--- a/gcc/config/i386/avx512ifmavlintrin.h
+++ b/gcc/config/i386/avx512ifmavlintrin.h
@@ -34,45 +34,26 @@
#define __DISABLE_AVX512IFMAVL__
#endif /* __AVX512IFMAVL__ */
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
-{
- return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
- (__v2di) __Y,
- (__v2di) __Z,
- (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
-{
- return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
- (__v2di) __Y,
- (__v2di) __Z,
- (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
-{
- return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
- (__v4di) __Y,
- (__v4di) __Z,
- (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
-{
- return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
- (__v4di) __Y,
- (__v4di) __Z,
- (__mmask8) -1);
-}
+#define _mm_madd52lo_epu64(A, B, C) \
+ ((__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) (A), \
+ (__v2di) (B), \
+ (__v2di) (C)))
+
+#define _mm_madd52hi_epu64(A, B, C) \
+ ((__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) (A), \
+ (__v2di) (B), \
+ (__v2di) (C)))
+
+#define _mm256_madd52lo_epu64(A, B, C) \
+ ((__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) (A), \
+ (__v4di) (B), \
+ (__v4di) (C)))
+
+
+#define _mm256_madd52hi_epu64(A, B, C) \
+ ((__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) (A), \
+ (__v4di) (B), \
+ (__v4di) (C)))
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avxifmaintrin.h b/gcc/config/i386/avxifmaintrin.h
new file mode 100644
index 0000000..3878d10
--- /dev/null
+++ b/gcc/config/i386/avxifmaintrin.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXIFMAINTRIN_H_INCLUDED
+#define _AVXIFMAINTRIN_H_INCLUDED
+
+#ifndef __AVXIFMA__
+#pragma GCC push_options
+#pragma GCC target("avxifma")
+#define __DISABLE_AVXIFMA__
+#endif /* __AVXIFMA__ */
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+{
+ return (__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __Z);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+{
+ return (__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __Z);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+{
+ return (__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __Z);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+{
+ return (__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __Z);
+}
+
+#ifdef __DISABLE_AVXIFMA__
+#undef __DISABLE_AVXIFMA__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVXIFMA__ */
+
+#endif /* _AVXIFMAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index a4c2fed..9885699 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -28,6 +28,7 @@
#define bit_AVXVNNI (1 << 4)
#define bit_AVX512BF16 (1 << 5)
#define bit_HRESET (1 << 22)
+#define bit_AVXIFMA (1 << 23)
/* %ecx */
#define bit_SSE3 (1 << 0)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index dea52a2..d22d79d 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2486,18 +2486,22 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builti
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
/* AVX512IFMA */
-BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52luqv4di, "__builtin_ia32_vpmadd52luq256", IX86_BUINTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv4di, "__builtin_ia32_vpmadd52huq256", IX86_BUINTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52luqv2di, "__builtin_ia32_vpmadd52luq128", IX86_BUINTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
/* AVX512VBMI */
BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 76668cc..9412cf1 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -279,10 +279,12 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2,
if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0)
&& (mask == 0 || (mask & ix86_isa_flags) != 0))
|| ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
- /* "Unified" builtin used by either AVXVNNI intrinsics or AVX512VNNIVL
- non-mask intrinsics should be defined whenever avxvnni
- or avx512vnni && avx512vl exist. */
+ /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics
+ or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be
+ defined whenever avxvnni/avxifma or avx512vnni/avxifma &&
+ avx512vl exist. */
|| (mask2 == OPTION_MASK_ISA2_AVXVNNI)
+ || (mask2 == OPTION_MASK_ISA2_AVXIFMA)
|| (lang_hooks.builtin_function
== lang_hooks.builtin_function_ext_scope))
{
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index eb0e3b3..3494ec0 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__WIDEKL__");
if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI)
def_or_undef (parse_in, "__AVXVNNI__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
+ def_or_undef (parse_in, "__AVXIFMA__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 70fd82b..0e8ba14 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12367,6 +12367,8 @@ ix86_check_builtin_isa_match (unsigned int fcode,
OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
(OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or
OPTION_MASK_ISA2_AVXVNNI
+ (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512IFMA) or
+ OPTION_MASK_ISA2_AVXIFMA
where for each such pair it is sufficient if either of the ISAs is
enabled, plus if it is ored with other options also those others.
OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */
@@ -12396,6 +12398,17 @@ ix86_check_builtin_isa_match (unsigned int fcode,
isa2 |= OPTION_MASK_ISA2_AVXVNNI;
}
+ if ((((bisa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
+ == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
+ || (bisa2 & OPTION_MASK_ISA2_AVXIFMA) != 0)
+ && (((isa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
+ == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
+ || (isa2 & OPTION_MASK_ISA2_AVXIFMA) != 0))
+ {
+ isa |= OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL;
+ isa2 |= OPTION_MASK_ISA2_AVXIFMA;
+ }
+
if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE
/* __builtin_ia32_maskmovq requires MMX registers. */
&& fcode != IX86_BUILTIN_MASKMOVQ)
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index 83659d0..6e0254c 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -109,3 +109,4 @@ DEF_PTA(KL)
DEF_PTA(WIDEKL)
DEF_PTA(AVXVNNI)
DEF_PTA(AVX512FP16)
+DEF_PTA(AVXIFMA)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index acb2291..5facb64 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -226,7 +226,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mkl", OPTION_MASK_ISA2_KL },
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
- { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }
+ { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
+ { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1072,6 +1073,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("hreset", OPT_mhreset),
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
+ IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6688d92..93538c5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -835,7 +835,8 @@
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
- avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16"
+ avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
+ avx512ifmavl"
(const_string "base"))
;; Define instruction set of MMX instructions
@@ -891,6 +892,9 @@
(symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
(eq_attr "isa" "avx512fp16")
(symbol_ref "TARGET_AVX512FP16")
+ (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA")
+ (eq_attr "isa" "avx512ifmavl")
+ (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL")
(eq_attr "mmx_isa" "native")
(symbol_ref "!TARGET_MMX_WITH_SSE")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 0dbaacb..36e28b70 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1214,3 +1214,8 @@ Do not use GOT to access external symbols.
-param=x86-stlf-window-ninsns=
Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param
Instructions number above which STFL stall penalty can be compensated.
+
+mavxifma
+Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
+AVXIFMA built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 6afd78c..e9d4e97 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -44,6 +44,8 @@
#include <avxvnniintrin.h>
+#include <avxifmaintrin.h>
+
#include <avx2intrin.h>
#include <avx512fintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 076064f..33f306a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -27867,7 +27867,7 @@
(define_int_attr vpmadd52type
[(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
-(define_expand "vpamdd52huq<mode>_maskz"
+(define_expand "vpmadd52huq<mode>_maskz"
[(match_operand:VI8_AVX512VL 0 "register_operand")
(match_operand:VI8_AVX512VL 1 "register_operand")
(match_operand:VI8_AVX512VL 2 "register_operand")
@@ -27875,13 +27875,13 @@
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512IFMA"
{
- emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
+ emit_insn (gen_vpmadd52huq<mode>_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
CONST0_RTX (<MODE>mode), operands[4]));
DONE;
})
-(define_expand "vpamdd52luq<mode>_maskz"
+(define_expand "vpmadd52luq<mode>_maskz"
[(match_operand:VI8_AVX512VL 0 "register_operand")
(match_operand:VI8_AVX512VL 1 "register_operand")
(match_operand:VI8_AVX512VL 2 "register_operand")
@@ -27889,26 +27889,58 @@
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512IFMA"
{
- emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
+ emit_insn (gen_vpmadd52luq<mode>_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
CONST0_RTX (<MODE>mode), operands[4]));
DONE;
})
-(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
- [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
- (unspec:VI8_AVX512VL
- [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
- (match_operand:VI8_AVX512VL 2 "register_operand" "v")
- (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
+(define_insn "vpmadd52<vpmadd52type>v8di"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (unspec:V8DI
+ [(match_operand:V8DI 1 "register_operand" "0")
+ (match_operand:V8DI 2 "register_operand" "v")
+ (match_operand:V8DI 3 "nonimmediate_operand" "vm")]
VPMADD52))]
"TARGET_AVX512IFMA"
- "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
+ "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "vpmadd52<vpmadd52type><mode>"
+ [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,v")
+ (unspec:VI8_AVX2
+ [(match_operand:VI8_AVX2 1 "register_operand" "0,0")
+ (match_operand:VI8_AVX2 2 "register_operand" "x,v")
+ (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm,vm")]
+ VPMADD52))]
+ "TARGET_AVXIFMA || (TARGET_AVX512IFMA && TARGET_AVX512VL)"
+ "@
+ %{vex%} vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}
+ vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "isa" "avxifma,avx512ifmavl")
+ (set_attr "type" "ssemuladd")
+ (set_attr "prefix" "vex,evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vpmadd52<vpmadd52type><mode>_maskz_1"
+ [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI8_AVX512VL
+ (unspec:VI8_AVX512VL
+ [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
+ (match_operand:VI8_AVX512VL 2 "register_operand" "v")
+ (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
+ VPMADD52)
+ (match_operand:VI8_AVX512VL 4 "const0_operand" "C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+ "TARGET_AVX512IFMA"
+ "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vpamdd52<vpmadd52type><mode>_mask"
+(define_insn "vpmadd52<vpmadd52type><mode>_mask"
[(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI8_AVX512VL
(unspec:VI8_AVX512VL