From 406675947d26ccbc2108e9689a2918bb36f61a63 Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Wed, 29 Sep 2021 09:48:20 +0800 Subject: Support Intel AVX-VNNI-INT8 gcc/ChangeLog * common/config/i386/cpuinfo.h (get_available_features): Detect avxvnniint8. * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AVXVNNIINT8_SET): New. (OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto. (ix86_handle_option): Handle -mavxvnniint8. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AVXVNNIINT8. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for avxvnniint8. * config.gcc: Add avxvnniint8intrin.h. * config/i386/avxvnniint8intrin.h: New file. * config/i386/cpuid.h (bit_AVXVNNIINT8): New. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __AVXVNNIINT8__. * config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8. (ix86_valid_target_attribute_inner_p): Handle avxvnniint8. * config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New.. * config/i386/i386.opt: Add option -mavxvnniint8. * config/i386/immintrin.h: Include avxvnniint8intrin.h. * config/i386/sse.md (UNSPEC_VPMADDUBSWACCD UNSPEC_VPMADDUBSWACCSSD,UNSPEC_VPMADDWDACCD, UNSPEC_VPMADDWDACCSSD): Rename according to new style. (vpdp_): New define_insn. * doc/extend.texi: Document avxvnniint8. * doc/invoke.texi: Document -mavxvnniint8. * doc/sourcebuild.texi: Document target avxvnniint8. gcc/testsuite/ChangeLog * g++.dg/other/i386-2.C: Add -mavxvnniint8. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/avx-check.h: Add avxvnniint8 check. * gcc.target/i386/sse-12.c: Add -mavxvnniint8. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * lib/target-supports.exp (check_effective_target_avxvnniint8): New. * gcc.target/i386/avxvnniint8-1.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto. Co-authored-by: Hongyu Wang Co-authored-by: Haochen Jiang --- gcc/config/i386/avxvnniint8intrin.h | 138 ++++++++++++++++++++++++++++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin.def | 14 ++++ gcc/config/i386/i386-c.cc | 2 + gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-options.cc | 4 +- gcc/config/i386/i386.opt | 5 ++ gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md | 71 +++++++++++++------ 9 files changed, 217 insertions(+), 21 deletions(-) create mode 100644 gcc/config/i386/avxvnniint8intrin.h (limited to 'gcc/config/i386') diff --git a/gcc/config/i386/avxvnniint8intrin.h b/gcc/config/i386/avxvnniint8intrin.h new file mode 100644 index 0000000..362e6f6 --- /dev/null +++ b/gcc/config/i386/avxvnniint8intrin.h @@ -0,0 +1,138 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED +#define _AVXVNNIINT8INTRIN_H_INCLUDED + +#if !defined(__AVXVNNIINT8__) +#pragma GCC push_options +#pragma GCC target("avxvnniint8") +#define __DISABLE_AVXVNNIINT8__ +#endif /* __AVXVNNIINT8__ */ + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si) __B); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B) +{ + return (__m128i) + __builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si) __B); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B) +{ + return (__m256i) + __builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B); +} + +#ifdef __DISABLE_AVXVNNIINT8__ +#undef __DISABLE_AVXVNNIINT8__ +#pragma GCC pop_options +#endif /* __DISABLE_AVXVNNIINT8__ */ + +#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 9885699..f5fad22 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -49,6 +49,7 @@ #define bit_RDRND (1 << 30) /* %edx */ +#define bit_AVXVNNIINT8 (1 << 4) #define bit_CMPXCHG8B (1 << 8) #define bit_CMOV (1 << 15) #define bit_MMX (1 << 23) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index d22d79d..e35306e 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2694,6 +2694,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) +/* AVXVNNIINT8 */ +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) + /* VPCLMULQDQ */ BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index 3494ec0..a9a35c0 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AVXVNNI__"); if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA) def_or_undef (parse_in, "__AVXIFMA__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8) + def_or_undef (parse_in, "__AVXVNNIINT8__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 6e0254c..c95b917 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -110,3 +110,4 @@ DEF_PTA(WIDEKL) DEF_PTA(AVXVNNI) DEF_PTA(AVX512FP16) DEF_PTA(AVXIFMA) +DEF_PTA(AVXVNNIINT8) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 5facb64..3e6d044 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, - { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA } + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }, + { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 } }; static struct ix86_target_opts isa_opts[] = { @@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), IX86_ATTR_ISA ("avxifma", OPT_mavxifma), + IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 36e28b70..53d534f 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1219,3 +1219,8 @@ mavxifma Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and AVXIFMA built-in functions and code generation. + +mavxvnniint8 +Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and +AVXVNNIINT8 built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index e9d4e97..ddea249 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -46,6 +46,8 @@ #include +#include + #include #include diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 33f306a..014b0b2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -166,10 +166,10 @@ UNSPEC_VPSHLDV ;; For AVX512VNNI support - UNSPEC_VPMADDUBSWACCD - UNSPEC_VPMADDUBSWACCSSD - UNSPEC_VPMADDWDACCD - UNSPEC_VPMADDWDACCSSD + UNSPEC_VPDPBUSD + UNSPEC_VPDPBUSDS + UNSPEC_VPDPWSSD + UNSPEC_VPDPWSSDS ;; For VAES support UNSPEC_VAESDEC @@ -200,6 +200,13 @@ UNSPEC_COMPLEX_FCMUL UNSPEC_COMPLEX_MASK + ;; For AVX-VNNI-INT8 support + UNSPEC_VPDPBSSD + UNSPEC_VPDPBSSDS + UNSPEC_VPDPBSUD + UNSPEC_VPDPBSUDS + UNSPEC_VPDPBUUD + UNSPEC_VPDPBUUDS ]) (define_c_enum "unspecv" [ @@ -28535,7 +28542,7 @@ [(match_operand:V16SI 1 "register_operand" "0") (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDUBSWACCD))] + UNSPEC_VPDPBUSD))] "TARGET_AVX512VNNI" "vpdpbusd\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -28546,7 +28553,7 @@ [(match_operand:VI4_AVX2 1 "register_operand" "0,0") (match_operand:VI4_AVX2 2 "register_operand" "x,v") (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] - UNSPEC_VPMADDUBSWACCD))] + UNSPEC_VPDPBUSD))] "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" "@ %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3} @@ -28561,7 +28568,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDUBSWACCD) + UNSPEC_VPDPBUSD) (match_dup 1) (match_operand: 4 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28590,7 +28597,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm") - ] UNSPEC_VPMADDUBSWACCD) + ] UNSPEC_VPDPBUSD) (match_operand:VI4_AVX512VL 4 "const0_operand") (match_operand: 5 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28603,7 +28610,7 @@ [(match_operand:V16SI 1 "register_operand" "0") (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDUBSWACCSSD))] + UNSPEC_VPDPBUSDS))] "TARGET_AVX512VNNI" "vpdpbusds\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -28614,7 +28621,7 @@ [(match_operand:VI4_AVX2 1 "register_operand" "0,0") (match_operand:VI4_AVX2 2 "register_operand" "x,v") (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] - UNSPEC_VPMADDUBSWACCSSD))] + UNSPEC_VPDPBUSDS))] "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" "@ %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3} @@ -28629,7 +28636,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDUBSWACCSSD) + UNSPEC_VPDPBUSDS) (match_dup 1) (match_operand: 4 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28658,7 +28665,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDUBSWACCSSD) + UNSPEC_VPDPBUSDS) (match_operand:VI4_AVX512VL 4 "const0_operand") (match_operand: 5 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28671,7 +28678,7 @@ [(match_operand:V16SI 1 "register_operand" "0") (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDWDACCD))] + UNSPEC_VPDPWSSD))] "TARGET_AVX512VNNI" "vpdpwssd\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -28682,7 +28689,7 @@ [(match_operand:VI4_AVX2 1 "register_operand" "0,0") (match_operand:VI4_AVX2 2 "register_operand" "x,v") (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] - UNSPEC_VPMADDWDACCD))] + UNSPEC_VPDPWSSD))] "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" "@ %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3} @@ -28697,7 +28704,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDWDACCD) + UNSPEC_VPDPWSSD) (match_dup 1) (match_operand: 4 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28726,7 +28733,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDWDACCD) + UNSPEC_VPDPWSSD) (match_operand:VI4_AVX512VL 4 "const0_operand") (match_operand: 5 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28739,7 +28746,7 @@ [(match_operand:V16SI 1 "register_operand" "0") (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDWDACCSSD))] + UNSPEC_VPDPWSSDS))] "TARGET_AVX512VNNI" "vpdpwssds\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -28750,7 +28757,7 @@ [(match_operand:VI4_AVX2 1 "register_operand" "0,0") (match_operand:VI4_AVX2 2 "register_operand" "x,v") (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] - UNSPEC_VPMADDWDACCSSD))] + UNSPEC_VPDPWSSDS))] "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" "@ %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3} @@ -28765,7 +28772,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDWDACCSSD) + UNSPEC_VPDPWSSDS) (match_dup 1) (match_operand: 4 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -28794,7 +28801,7 @@ [(match_operand:VI4_AVX512VL 1 "register_operand" "0") (match_operand:VI4_AVX512VL 2 "register_operand" "v") (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] - UNSPEC_VPMADDWDACCSSD) + UNSPEC_VPDPWSSDS) (match_operand:VI4_AVX512VL 4 "const0_operand") (match_operand: 5 "register_operand" "Yk")))] "TARGET_AVX512VNNI" @@ -29235,3 +29242,27 @@ gcc_unreachable (); DONE; }) + +(define_int_iterator VPDOTPROD + [UNSPEC_VPDPBSSD + UNSPEC_VPDPBSSDS + UNSPEC_VPDPBSUD + UNSPEC_VPDPBSUDS + UNSPEC_VPDPBUUD + UNSPEC_VPDPBUUDS]) + +(define_int_attr vpdotprodtype + [(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds") + (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds") + (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")]) + +(define_insn "vpdp_" + [(set (match_operand:VI4_AVX 0 "register_operand" "=x") + (unspec:VI4_AVX + [(match_operand:VI4_AVX 1 "register_operand" "0") + (match_operand:VI4_AVX 2 "register_operand" "x") + (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")] + VPDOTPROD))] + "TARGET_AVXVNNIINT8" + "vpdp\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "prefix" "vex")]) -- cgit v1.1