diff options
Diffstat (limited to 'gcc')
33 files changed, 733 insertions, 19 deletions
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index e3eb6e9..67724c3 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -995,6 +995,17 @@ get_available_features (struct __processor_model *cpu_model, } } + /* Get Advanced Features at level 0x1e (eax = 0x1e, ecx = 1). */ + if (max_cpuid_level >= 0x1e) + { + __cpuid_count (0x1e, 1, eax, ebx, ecx, edx); + if (amx_usable) + { + if (eax & bit_AMX_AVX512) + set_feature (FEATURE_AMX_AVX512); + } + } + /* Get Advanced Features at level 0x24 (eax = 0x24, ecx = 0). */ if (avx10_set && max_cpuid_level >= 0x24) { diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 4a213f5..e8e3eb1 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -127,6 +127,9 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX10_2_512_SET \ (OPTION_MASK_ISA2_AVX10_1_512_SET | OPTION_MASK_ISA2_AVX10_2_256_SET \ | OPTION_MASK_ISA2_AVX10_2_512) +#define OPTION_MASK_ISA2_AMX_AVX512_SET \ + (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AVX10_2_512_SET \ + | OPTION_MASK_ISA2_AMX_AVX512) /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same as -msse4.2. */ @@ -289,7 +292,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AMX_TILE_UNSET \ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_INT8_UNSET \ | OPTION_MASK_ISA2_AMX_BF16_UNSET | OPTION_MASK_ISA2_AMX_FP16_UNSET \ - | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET) + | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET | OPTION_MASK_ISA2_AMX_AVX512_UNSET) #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8 #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16 #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR @@ -317,7 +320,9 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX10_1_512_UNSET \ (OPTION_MASK_ISA2_AVX10_1_512 | OPTION_MASK_ISA2_AVX10_2_512_UNSET) #define OPTION_MASK_ISA2_AVX10_2_256_UNSET OPTION_MASK_ISA2_AVX10_2_256 -#define OPTION_MASK_ISA2_AVX10_2_512_UNSET OPTION_MASK_ISA2_AVX10_2_512 +#define OPTION_MASK_ISA2_AVX10_2_512_UNSET \ + (OPTION_MASK_ISA2_AVX10_2_512 | OPTION_MASK_ISA2_AMX_AVX512_UNSET) +#define OPTION_MASK_ISA2_AMX_AVX512_UNSET OPTION_MASK_ISA2_AMX_AVX512 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -1409,6 +1414,21 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mamx_avx512: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_AVX512_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_AVX512_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_AVX512_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_AVX512_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index b573166..cc5bb0d 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -269,6 +269,7 @@ enum processor_features FEATURE_AVX10_1_512, FEATURE_AVX10_2_256, FEATURE_AVX10_2_512, + FEATURE_AMX_AVX512, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index a7c7e63..7ea852a 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -189,4 +189,6 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("avx10.2", FEATURE_AVX10_2_256, P_NONE, "-mavx10.2") ISA_NAMES_TABLE_ENTRY("avx10.2-256", FEATURE_AVX10_2_256, P_NONE, "-mavx10.2-256") ISA_NAMES_TABLE_ENTRY("avx10.2-512", FEATURE_AVX10_2_512, P_NONE, "-mavx10.2-512") + ISA_NAMES_TABLE_ENTRY("amx-avx512", FEATURE_AMX_AVX512, P_NONE, + "-mamx-avx512") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index c3531e5..5d0240e 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -458,7 +458,7 @@ i[34567]86-*-* | x86_64-*-*) avx10_2bf16intrin.h avx10_2-512bf16intrin.h avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h - avx10_2copyintrin.h" + avx10_2copyintrin.h amxavx512intrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h new file mode 100644 index 0000000..146a981 --- /dev/null +++ b/gcc/config/i386/amxavx512intrin.h @@ -0,0 +1,189 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AMXAVX512INTRIN_H_INCLUDED +#define _AMXAVX512INTRIN_H_INCLUDED + +#if !defined(__AMX_AVX512__) +#pragma GCC push_options +#pragma GCC target("amx-avx512") +#define __DISABLE_AMX_AVX512__ +#endif /* __AMX_AVX512__ */ + +#if defined(__x86_64__) +#define _tile_cvtrowd2ps_internal(src,A) \ +({ \ + __m512 dst; \ + __asm__ volatile \ + ("{tcvtrowd2ps\t%1, %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", %1}" \ + : "=v" (dst) : "r" ((unsigned) (A))); \ + dst; \ +}) + +#define _tile_cvtrowd2psi_internal(src,imm) \ +({ \ + __m512 dst; \ + __asm__ volatile \ + ("{tcvtrowd2ps\t$"#imm", %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", "#imm"}" \ + : "=v" (dst) :); \ + dst; \ +}) + +#define _tile_cvtrowps2pbf16h_internal(src,A) \ +({ \ + __m512bh dst; \ + __asm__ volatile \ + ("{tcvtrowps2pbf16h\t%1, %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", %1}" \ + : "=v" (dst) : "r" ((unsigned) (A))); \ + dst; \ +}) + +#define _tile_cvtrowps2pbf16hi_internal(src,imm) \ +({ \ + __m512bh dst; \ + __asm__ volatile \ + ("{tcvtrowps2pbf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", "#imm"}" \ + : "=v" (dst) :); \ + dst; \ +}) + +#define _tile_cvtrowps2pbf16l_internal(src,A) \ +({ \ + __m512bh dst; \ + __asm__ volatile \ + ("{tcvtrowps2pbf16l\t%1, %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", %1}" \ + : "=v" (dst) : "r" ((unsigned) (A))); \ + dst; \ +}) + +#define _tile_cvtrowps2pbf16li_internal(src,imm) \ +({ \ + __m512bh dst; \ + __asm__ volatile \ + ("{tcvtrowps2pbf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", "#imm"}" \ + : "=v" (dst) :); \ + dst; \ +}) + +#define _tile_cvtrowps2phh_internal(src,A) \ +({ \ + __m512h dst; \ + __asm__ volatile \ + ("{tcvtrowps2phh\t%1, %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", %1}" \ + : "=v" (dst) : "r" ((unsigned) (A))); \ + dst; \ +}) + +#define _tile_cvtrowps2phhi_internal(src,imm) \ +({ \ + __m512h dst; \ + __asm__ volatile \ + ("{tcvtrowps2phh\t$"#imm", %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", "#imm"}" \ + : "=v" (dst) :); \ + dst; \ +}) + +#define _tile_cvtrowps2phl_internal(src,A) \ +({ \ + __m512h dst; \ + __asm__ volatile \ + ("{tcvtrowps2phl\t%1, %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", %1}" \ + : "=v" (dst) : "r" ((unsigned) (A))); \ + dst; \ +}) + +#define _tile_cvtrowps2phli_internal(src,imm) \ +({ \ + __m512h dst; \ + __asm__ volatile \ + ("{tcvtrowps2phl\t$"#imm", %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", "#imm"}" \ + : "=v" (dst) :); \ + dst; \ +}) + +#define _tile_movrow_internal(src,A) \ +({ \ + __m512 dst; \ + __asm__ volatile \ + ("{tilemovrow\t%1, %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", %1}" \ + : "=v" (dst) : "r" ((unsigned) (A))); \ + dst; \ +}) + +#define _tile_movrowi_internal(src,imm) \ +({ \ + __m512 dst; \ + __asm__ volatile \ + ("{tilemovrow\t$"#imm", %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", "#imm"}" \ + : "=v" (dst) :); \ + dst; \ +}) + +#define _tile_cvtrowd2ps(src,A) \ + _tile_cvtrowd2ps_internal (src,A) + +#define _tile_cvtrowd2psi(src,imm) \ + _tile_cvtrowd2psi_internal (src,imm) + +#define _tile_cvtrowps2pbf16h(src,A) \ + _tile_cvtrowps2pbf16h_internal (src,A) + +#define _tile_cvtrowps2pbf16hi(src,imm) \ + _tile_cvtrowps2pbf16hi_internal (src,imm) + +#define _tile_cvtrowps2pbf16l(src,A) \ + _tile_cvtrowps2pbf16l_internal (src,A) + +#define _tile_cvtrowps2pbf16li(src,imm) \ + _tile_cvtrowps2pbf16li_internal (src,imm) + +#define _tile_cvtrowps2phh(src,A) \ + _tile_cvtrowps2phh_internal (src,A) + +#define _tile_cvtrowps2phhi(src,imm) \ + _tile_cvtrowps2phhi_internal (src,imm) + +#define _tile_cvtrowps2phl(src,A) \ + _tile_cvtrowps2phl_internal (src,A) + +#define _tile_cvtrowps2phli(src,imm) \ + _tile_cvtrowps2phli_internal (src,imm) + +#define _tile_movrow(src,A) \ + _tile_movrow_internal (src,A) + +#define _tile_movrowi(src,imm) \ + _tile_movrowi_internal (src,imm) + +#endif + +#ifdef __DISABLE_AMX_AVX512__ +#undef __DISABLE_AMX_AVX512__ +#pragma GCC pop_options +#endif /* __DISABLE_AMX_AVX512__ */ + +#endif /* _AMXAVX512INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index a75ba2b..2fc163b 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -162,6 +162,10 @@ #define bit_AESKLE ( 1<<0 ) #define bit_WIDEKL ( 1<<2 ) +/* AMX sub leaf (%eax == 0x1e, %ecx == 1) */ +/* %eax */ +#define bit_AMX_AVX512 (1 << 7) + /* AVX10 sub leaf (%eax == 0x24) */ /* %ebx */ #define bit_AVX10_256 (1 << 17) diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index 72435fe..1c36beb 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -741,6 +741,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AVX10_2_256__"); if (isa_flag2 & OPTION_MASK_ISA2_AVX10_2_512) def_or_undef (parse_in, "__AVX10_2_512__"); + if (isa_flag2 & OPTION_MASK_ISA2_AMX_AVX512) + def_or_undef (parse_in, "__AMX_AVX512__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index bfb33ba..fcc3bc4 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -123,3 +123,4 @@ DEF_PTA(AVX10_1_256) DEF_PTA(AVX10_1_512) DEF_PTA(AVX10_2_256) DEF_PTA(AVX10_2_512) +DEF_PTA(AMX_AVX512) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 38037de..2f6646f 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -263,7 +263,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mavx10.1-256", OPTION_MASK_ISA2_AVX10_1_256 }, { "-mavx10.1-512", OPTION_MASK_ISA2_AVX10_1_512 }, { "-mavx10.2-256", OPTION_MASK_ISA2_AVX10_2_256 }, - { "-mavx10.2-512", OPTION_MASK_ISA2_AVX10_2_512 } + { "-mavx10.2-512", OPTION_MASK_ISA2_AVX10_2_512 }, + { "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 } }; static struct ix86_target_opts isa_opts[] = { @@ -1132,6 +1133,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2_256), IX86_ATTR_ISA ("avx10.2-256", OPT_mavx10_2_256), IX86_ATTR_ISA ("avx10.2-512", OPT_mavx10_2_512), + IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 64c295d..232daff 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1389,3 +1389,8 @@ mavx10.2 Target Alias(mavx10.2-256) Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1 and AVX10.2 built-in functions and code generation. + +mamx-avx512 +Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512, +AVX10.2-512 and AMX-AVX512 built-in functions and code generation. diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls index fc70616..9f590f6 100644 --- a/gcc/config/i386/i386.opt.urls +++ b/gcc/config/i386/i386.opt.urls @@ -613,3 +613,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2-512) mavx10.2 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2) +mamx-avx512 +UrlSuffix(gcc/x86-Options.html#index-mamx-avx512) + diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 6b8035e..772af56 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -132,6 +132,8 @@ #include <amxcomplexintrin.h> +#include <amxavx512intrin.h> + #include <prfchwintrin.h> #include <keylockerintrin.h> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index f97e008..d2b3086 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7544,6 +7544,11 @@ Enable/disbale the generation of the AVX10.2 instructions. @itemx no-avx10.2-512 Enable/disable the generation of the AVX10.2 512 bit instructions. +@cindex @code{target("amx-avx512")} function attribute, x86 +@item amx-avx512 +@itemx no-amx-avx512 +Enable/disable the generation of the AMX-AVX512 instructions. + @cindex @code{target("cld")} function attribute, x86 @item cld @itemx no-cld diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 19c148a..1186bdd 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1486,7 +1486,7 @@ See RS/6000 and PowerPC Options. -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf -musermsr -mavx10.1 -mavx10.1-256 -mavx10.1-512 -mevex512 -mavx10.2 -mavx10.2-256 --mavx10.2-512 +-mavx10.2-512 -mamx-avx512 -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops -minline-stringops-dynamically -mstringop-strategy=@var{alg} -mkl -mwidekl @@ -35674,6 +35674,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @opindex mavx10.2-512 @itemx -mavx10.2-512 +@need 200 +@opindex mamx-avx512 +@itemx -mamx-avx512 These switches enable the use of instructions in the MMX, SSE, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG, @@ -35684,9 +35687,9 @@ WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX512VNNI, SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, -SM4, APX_F, USER_MSR, AVX10.1, AVX10.2 or CLDEMOTE extended instruction sets. -Each has a corresponding @option{-mno-} option to disable use of these -instructions. +SM4, APX_F, USER_MSR, AVX10.1, AVX10.2, AMX-AVX512 or CLDEMOTE extended +instruction sets. Each has a corresponding @option{-mno-} option to disable +use of these instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 38275fd..0dfbc57 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2644,6 +2644,9 @@ Target supports the execution of @code{amx-int8} instructions. @item amx_bf16 Target supports the execution of @code{amx-bf16} instructions. +@item amx_avx512 +Target supports the execution of @code{amx-avx512} instructions. + @item amx_complex Target supports the execution of @code{amx-complex} instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index d0492dc..8e872f7 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ /* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 3bfc839..133e64f 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ /* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h index f1a04cf..a336392 100644 --- a/gcc/testsuite/gcc.target/i386/amx-check.h +++ b/gcc/testsuite/gcc.target/i386/amx-check.h @@ -219,6 +219,9 @@ main () #ifdef AMX_COMPLEX && __builtin_cpu_supports ("amx-complex") #endif +#ifdef AMX_AVX512 + && __builtin_cpu_supports ("amx-avx512") +#endif #ifdef __linux__ && request_perm_xtile_data () #endif diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h index 6ed9f5e..847882d 100644 --- a/gcc/testsuite/gcc.target/i386/amx-helper.h +++ b/gcc/testsuite/gcc.target/i386/amx-helper.h @@ -1,9 +1,7 @@ #ifndef AMX_HELPER_H_INCLUDED #define AMX_HELPER_H_INCLUDED -#if defined(AMX_FP16) || defined(AMX_COMPLEX) #include <immintrin.h> #include <xmmintrin.h> -#endif #include "amx-check.h" typedef union @@ -12,7 +10,25 @@ typedef union uint16_t u; } union16f_uw; -#if defined(AMX_FP16) || defined(AMX_COMPLEX) +typedef union +{ + __bf16 bf16; + uint16_t u; +} union16bh_uw; + +typedef union +{ + float f; + uint32_t u; +} union32f_ud; + +typedef union +{ + __m512 m; + uint8_t u[64]; +} union512_ub; + +#if defined(AMX_FP16) || defined(AMX_COMPLEX) || defined (AMX_AVX512) /* Transformation functions between fp16/float */ static uint16_t make_f32_fp16 (float f) { @@ -58,4 +74,87 @@ void init_fp16_max_tile_zero_buffer (uint8_t* buf) } #endif +#if defined (AMX_AVX512) +/* Transformation functions between bf16/float */ +static uint16_t make_f32_bf16 (float f) +{ + union16bh_uw tmp; + tmp.bf16 = (__bf16) f; + return tmp.u; +} + +static float make_bf16_f32 (uint16_t bf) +{ + union16bh_uw tmp; + tmp.u = bf; + return _mm_cvtsbh_ss (tmp.bf16); +} + +/* Init tile buffer with bf16 pairs */ +void init_bf16_max_tile_buffer (uint8_t *buf) +{ + int i, j; + uint16_t* ptr = (uint16_t *) buf; + + for (i = 0; i < 16; i++) + for (j = 0; j < 32; j++) + { + float f = 2.5f * i + 1.25f * j; + ptr[i * 32 + j] = make_f32_bf16 (f); + } +} +#endif + +/* Init tile buffer with fp32 */ +void init_fp32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + float* ptr = (float *) buf; + + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = 2.5f * i + 1.25f * j; +} + +/* Init tile buffer with int32 */ +void init_int32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + uint32_t *ptr = (uint32_t *)buf; + + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = (uint32_t) (3 * j - 16 * i); +} + +#define COMPARE_ZMM(A, B) \ +for (int j = 0; j < 16; j++) \ +{ \ + union32f_ud fu1, fu2; \ + fu1.f = A[j]; \ + fu2.f = B[j]; \ + if (fu1.u != fu2.u) \ + abort (); \ +} + +#define COMPARE_ZMM_BF16(A, B) \ +for (int j = 0; j < 32; j++) \ +{ \ + union16bh_uw fu1, fu2; \ + fu1.bf16 = A[j]; \ + fu2.bf16 = B[j]; \ + if (fu1.u != fu2.u) \ + abort(); \ +} + +#define COMPARE_ZMM_FP16(A, B) \ +for (int j = 0; j < 32; j++) \ +{ \ + union16f_uw fu1, fu2; \ + fu1.f16 = A[j]; \ + fu2.f16 = B[j]; \ + if (fu1.u != fu2.u) \ + abort(); \ +} + #endif diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c new file mode 100644 index 0000000..497218d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */ +#include <immintrin.h> + +#define TMM1 1 + +__m512 a; +__m512bh b; +__m512h c; + +void TEST () +{ + a = _tile_cvtrowd2ps (TMM1, 1); + a = _tile_cvtrowd2psi (TMM1, 2); + b = _tile_cvtrowps2pbf16h (TMM1, 3); + b = _tile_cvtrowps2pbf16hi (TMM1, 4); + b = _tile_cvtrowps2pbf16l (TMM1, 5); + b = _tile_cvtrowps2pbf16li (TMM1, 6); + c = _tile_cvtrowps2phh (TMM1, 7); + c = _tile_cvtrowps2phhi (TMM1, 8); + c = _tile_cvtrowps2phl (TMM1, 9); + c = _tile_cvtrowps2phli (TMM1, 10); + a = _tile_movrow (TMM1, 11); + a = _tile_movrowi (TMM1, 12); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c new file mode 100644 index 0000000..4011043 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */ +/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +#include <immintrin.h> + +__m512 a; +__m512bh b; +__m512h c; + +void TEST () +{ + a = _tile_cvtrowd2ps (1, 1); + a = _tile_cvtrowd2psi (1, 2); + b = _tile_cvtrowps2pbf16h (1, 3); + b = _tile_cvtrowps2pbf16hi (1, 4); + b = _tile_cvtrowps2pbf16l (1, 5); + b = _tile_cvtrowps2pbf16li (1, 6); + c = _tile_cvtrowps2phh (1, 7); + c = _tile_cvtrowps2phhi (1, 8); + c = _tile_cvtrowps2phl (1, 9); + c = _tile_cvtrowps2phli (1, 10); + a = _tile_movrow (1, 11); + a = _tile_movrowi (1, 12); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c new file mode 100644 index 0000000..cfd5644 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c @@ -0,0 +1,62 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowd2ps +void test_amx_avx512_cvtrowd2ps(); +#include "amx-helper.h" + +volatile __m512 cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWD2PS(EI, T) \ +__m512 \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowd2ps##EI (__tile *src, T __A) \ +{ \ + uint32_t *src_buf = (uint32_t *)src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, j; \ + __m512 res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + res[j] = 0; \ + else \ + res[j] = (float) (int) src_buf[row_index * N + j + row_chunk / 4]; \ + return res; \ +} + +DEFINE_TEST_CVTROWD2PS(e, unsigned) +DEFINE_TEST_CVTROWD2PS(i, const unsigned) + +#define TEST_CVTROWD2PS(X, Y, EI, T, INTRIN) \ +cal_dst = calc_cvtrowd2ps##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowd2ps() +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_int32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWD2PS (&src, a, e, unsigned, cvtrowd2ps); + TEST_CVTROWD2PS (&src, 1, i, const unsigned, cvtrowd2psi); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c new file mode 100644 index 0000000..dfd1d6a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c @@ -0,0 +1,82 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowps2pbf16 +void test_amx_avx512_cvtrowps2pbf16(); +#include "amx-helper.h" + +volatile __m512bh cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \ +__m512bh \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \ +{ \ + float *src_buf = (float *) src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, zeropos, pos, j, k; \ + __m512bh res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + if ((#HL) == "h") \ + { \ + zeropos = 0; \ + pos = 1; \ + } \ + else \ + { \ + zeropos = 1; \ + pos = 0; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + for (k = 0; k < 2; k++) \ + res[2 * j + k] = 0; \ + else \ + { \ + union16bh_uw tmp; \ + tmp.u = make_f32_bf16 (src_buf[row_index * N + j + row_chunk / 4]); \ + res[2 * j + pos] = tmp.bf16; \ + res[2 * j + zeropos] = (__bf16) 0; \ + } \ + return res; \ +} + +DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned) +DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned) +DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned) +DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned) + +#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \ +cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM_BF16(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowps2pbf16 () +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_fp32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h); + TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l); + TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi); + TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c new file mode 100644 index 0000000..1fd28de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c @@ -0,0 +1,82 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowps2ph +void test_amx_avx512_cvtrowps2ph(); +#include "amx-helper.h" + +volatile __m512h cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWPS2PH(HL, EI, T) \ +__m512h \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowps2ph##HL##EI (__tile *src, T __A) \ +{ \ + float *src_buf = (float *) src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, zeropos, pos, j, k; \ + __m512h res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + if ((#HL) == "h") \ + { \ + zeropos = 0; \ + pos = 1; \ + } \ + else \ + { \ + zeropos = 1; \ + pos = 0; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + for (k = 0; k < 2; k++) \ + res[2 * j + k] = 0; \ + else \ + { \ + union16f_uw tmp; \ + tmp.u = make_f32_fp16 (src_buf[row_index * N + j + row_chunk / 4]); \ + res[2 * j + zeropos] = 0; \ + res[2 * j + pos] = tmp.f16; \ + } \ + return res; \ +} + +DEFINE_TEST_CVTROWPS2PH(h, e, unsigned) +DEFINE_TEST_CVTROWPS2PH(l, e, unsigned) +DEFINE_TEST_CVTROWPS2PH(h, i, const unsigned) +DEFINE_TEST_CVTROWPS2PH(l, i, const unsigned) + +#define TEST_CVTROWPS2PH(X, Y, HL, EI, T, INTRIN) \ +cal_dst = calc_cvtrowps2ph##HL##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM_FP16(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowps2ph () +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_fp32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWPS2PH (&src, a, h, e, unsigned, cvtrowps2phh); + TEST_CVTROWPS2PH (&src, a, l, e, unsigned, cvtrowps2phl); + TEST_CVTROWPS2PH (&src, 1, h, i, const unsigned, cvtrowps2phhi); + TEST_CVTROWPS2PH (&src, 1, l, i, const unsigned, cvtrowps2phli); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c new file mode 100644 index 0000000..ea28d82 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c @@ -0,0 +1,59 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_movrow +void test_amx_avx512_movrow(); +#include "amx-helper.h" + +int j, k; +volatile __m512 cal_dst, cmp_dst; + +#define TEST_MOVROW(X, Y, EI, T, INTRIN) \ +__m512 \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_movrow##EI (__tile *src, T __A) \ +{ \ + uint8_t *src_buf = (uint8_t *)src->buf; \ + int N = src->colsb; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk; \ + __m512 res; \ + if ((EI) == 'e') \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + union512_ub tmp; \ + for (j = 0; j < vl_bytes; j++) \ + if (j + row_chunk >= N) \ + tmp.u[j] = 0; \ + else \ + tmp.u[j] = src_buf[row_index * N + j + row_chunk]; \ + res = tmp.m; \ + return res; \ +} \ +cal_dst = calc_movrow##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM(cal_dst, cmp_dst); + +void test_amx_avx512_movrow() +{ + __tilecfg_u cfg; + __tile src; + unsigned a = 2; + char e = 'e', i = 'i'; + + init_tile_config (&cfg); + init_tile_reg_and_src (1, src); + + TEST_MOVROW (&src, a, e, unsigned, movrow); + TEST_MOVROW (&src, 1, i, const unsigned, movrowi); + +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 0852e53..b4ffc5f 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -89,6 +89,7 @@ extern void test_sm4 (void) __attribute__((__target__("sm4") extern void test_user_msr (void) __attribute__((__target__("usermsr"))); extern void test_avx10_2 (void) __attribute__((__target__("avx10.2"))); extern void test_avx10_2_512 (void) __attribute__((__target__("avx10.2-512"))); +extern void test_amx_avx512 (void) __attribute__((__target__("amx-avx512"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq"))); @@ -179,6 +180,7 @@ extern void test_no_sm4 (void) __attribute__((__target__("no-sm extern void test_no_user_msr (void) __attribute__((__target__("no-usermsr"))); extern void test_no_avx10_2 (void) __attribute__((__target__("no-avx10.2"))); extern void test_no_avx10_2_512 (void) __attribute__((__target__("no-avx10.2-512"))); +extern void test_no_amx_avx512 (void) __attribute__((__target__("no-amx-avx512"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index fbc39c5..3349ce0 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index b32a5d75..9725cfe 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 4662c86..13e636c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 229e2f7..7c43c06 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512") #endif #include <immintrin.h> test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f0e2054..76e0d8d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1082,6 +1082,6 @@ #define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512") #include <x86intrin.h> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 5638e45..3b18269 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10775,6 +10775,17 @@ proc check_effective_target_avx10_2_512 { } { } "-mavx10.2-512" ] } +# Return 1 if amx-avx512 instructions can be compiled. +proc check_effective_target_amx_avx512 { } { + return [check_no_compiler_messages amx_avx512 object { + void + foo () + { + __asm__ volatile ("tilemovrow\t%%edx, %%tmm2, %%zmm1" ::); + } + } "-mamx-avx512" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { |