aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2019-07-25 16:49:36 +0800
committerH.J. Lu <hjl.tools@gmail.com>2020-09-28 05:30:30 -0700
commit5c609842d13a4c9c6be1a10f7980a74d27daeb85 (patch)
tree07c87896cb1402fbdd84d01ca81e5f0404783824
parent92f0d3d03a78a8aabe62e4c1e1b300b01516732f (diff)
downloadgcc-5c609842d13a4c9c6be1a10f7980a74d27daeb85.zip
gcc-5c609842d13a4c9c6be1a10f7980a74d27daeb85.tar.gz
gcc-5c609842d13a4c9c6be1a10f7980a74d27daeb85.tar.bz2
Enable GCC support for AMX-TILE,AMX-INT8,AMX-BF16.
AMX-TILE:ldtilecfg/sttilecfg/tileloadd/tileloaddt1/tilezero/tilerelease AMX-INT8:tdpbssd/tdpbsud/tdpbusd/tdpbuud AMX-BF16:tdpbf16ps gcc/ChangeLog * common/config/i386/i386-common.c (OPTION_MASK_ISA2_AMX_TILE_SET, OPTION_MASK_ISA2_AMX_INT8_SET, OPTION_MASK_ISA2_AMX_BF16_SET, OPTION_MASK_ISA2_AMX_TILE_UNSET, OPTION_MASK_ISA2_AMX_INT8_UNSET, OPTION_MASK_ISA2_AMX_BF16_UNSET, OPTION_MASK_ISA2_XSAVE_UNSET): New marcos. (ix86_handle_option): Hanlde -mamx-tile, -mamx-int8, -mamx-bf16. * common/config/i386/i386-cpuinfo.h (processor_types): Add FEATURE_AMX_TILE, FEATURE_AMX_INT8, FEATURE_AMX_BF16. * common/config/i386/cpuinfo.h (XSTATE_TILECFG, XSTATE_TILEDATA, XCR_AMX_ENABLED_MASK): New macro. (get_available_features): Enable AMX features only if their states are suoorited by OSXSAVE. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for amx-tile, amx-int8, amx-bf16. * config.gcc: Add amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h to extra headers. * config/i386/amxbf16intrin.h: New file. * config/i386/amxint8intrin.h: Ditto. * config/i386/amxtileintrin.h: Ditto. * config/i386/cpuid.h (bit_AMX_BF16, bit_AMX_TILE, bit_AMX_INT8): New macro. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AMX_TILE__, __AMX_INT8__, AMX_BF16__. * config/i386/i386-options.c (ix86_target_string): Add -mamx-tile, -mamx-int8, -mamx-bf16. (ix86_option_override_internal): Handle AMX-TILE, AMX-INT8, AMX-BF16. * config/i386/i386.h (TARGET_AMX_TILE, TARGET_AMX_TILE_P, TARGET_AMX_INT8, TARGET_AMX_INT8_P, TARGET_AMX_BF16_P, PTA_AMX_TILE, PTA_AMX_INT8, PTA_AMX_BF16): New macros. * config/i386/i386.opt: Add -mamx-tile, -mamx-int8, -mamx-bf16. * config/i386/immintrin.h: Include amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h. * doc/invoke.texi: Document -mamx-tile, -mamx-int8, -mamx-bf16. * doc/extend.texi: Document amx-tile, amx-int8, amx-bf16. * doc/sourcebuild.texi ((Effective-Target Keywords, Other hardware attributes): Document amx_int8, amx_tile, amx_bf16. gcc/testsuite/ChangeLog * lib/target-supports.exp (check_effective_target_amx_tile, check_effective_target_amx_int8, check_effective_target_amx_bf16): New proc. * g++.dg/other/i386-2.C: Add -mamx-tile, -mamx-int8, -mamx-bf16. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/amx-check.h: New header file. * gcc.target/i386/amxbf16-asmatt-1.c: New test. * gcc.target/i386/amxint8-asmatt-1.c: New test. * gcc.target/i386/amxtile-asmatt-1.c: Ditto. * gcc.target/i386/amxbf16-asmintel-1.c: Ditto. * gcc.target/i386/amxint8-asmintel-1.c: Ditto. * gcc.target/i386/amxtile-asmintel-1.c: Ditto. * gcc.target/i386/amxbf16-dpbf16ps-2.c: Ditto. * gcc.target/i386/amxint8-dpbssd-2.c: Ditto. * gcc.target/i386/amxint8-dpbsud-2.c: Ditto. * gcc.target/i386/amxint8-dpbusd-2.c: Ditto. * gcc.target/i386/amxint8-dpbuud-2.c: Ditto. * gcc.target/i386/amxtile-2.c: Ditto.
-rw-r--r--gcc/common/config/i386/cpuinfo.h16
-rw-r--r--gcc/common/config/i386/i386-common.c50
-rw-r--r--gcc/common/config/i386/i386-cpuinfo.h3
-rw-r--r--gcc/common/config/i386/i386-isas.h3
-rw-r--r--gcc/config.gcc6
-rw-r--r--gcc/config/i386/amxbf16intrin.h29
-rw-r--r--gcc/config/i386/amxint8intrin.h38
-rw-r--r--gcc/config/i386/amxtileintrin.h75
-rw-r--r--gcc/config/i386/cpuid.h3
-rw-r--r--gcc/config/i386/i386-c.c7
-rw-r--r--gcc/config/i386/i386-options.c20
-rw-r--r--gcc/config/i386/i386.h12
-rw-r--r--gcc/config/i386/i386.opt14
-rw-r--r--gcc/config/i386/immintrin.h6
-rw-r--r--gcc/doc/extend.texi15
-rw-r--r--gcc/doc/invoke.texi10
-rw-r--r--gcc/doc/sourcebuild.texi9
-rw-r--r--gcc/testsuite/g++.dg/other/i386-2.C3
-rw-r--r--gcc/testsuite/g++.dg/other/i386-3.C3
-rw-r--r--gcc/testsuite/gcc.target/i386/amx-check.h185
-rw-r--r--gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c83
-rw-r--r--gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c61
-rw-r--r--gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c61
-rw-r--r--gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c61
-rw-r--r--gcc/testsuite/gcc.target/i386/amxtile-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/funcspec-56.inc6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c5
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c3
-rw-r--r--gcc/testsuite/lib/target-supports.exp33
39 files changed, 1032 insertions, 13 deletions
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 12237e2..c96455c 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -509,15 +509,20 @@ get_available_features (struct __processor_model *cpu_model,
#define XSTATE_OPMASK 0x20
#define XSTATE_ZMM 0x40
#define XSTATE_HI_ZMM 0x80
+#define XSTATE_TILECFG 0x20000
+#define XSTATE_TILEDATA 0x40000
#define XCR_AVX_ENABLED_MASK \
(XSTATE_SSE | XSTATE_YMM)
#define XCR_AVX512F_ENABLED_MASK \
(XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
+#define XCR_AMX_ENABLED_MASK \
+ (XSTATE_TILECFG | XSTATE_TILEDATA)
/* Check if AVX and AVX512 are usable. */
int avx_usable = 0;
int avx512_usable = 0;
+ int amx_usable = 0;
if ((ecx & bit_OSXSAVE))
{
/* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
@@ -533,6 +538,8 @@ get_available_features (struct __processor_model *cpu_model,
avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK)
== XCR_AVX512F_ENABLED_MASK);
}
+ amx_usable = ((xcrlow & XCR_AMX_ENABLED_MASK)
+ == XCR_AMX_ENABLED_MASK);
}
#define set_feature(f) \
@@ -651,6 +658,15 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_PCONFIG);
if (edx & bit_IBT)
set_feature (FEATURE_IBT);
+ if (amx_usable)
+ {
+ if (edx & bit_AMX_TILE)
+ set_feature (FEATURE_AMX_TILE);
+ if (edx & bit_AMX_INT8)
+ set_feature (FEATURE_AMX_INT8);
+ if (edx & bit_AMX_BF16)
+ set_feature (FEATURE_AMX_BF16);
+ }
if (avx512_usable)
{
if (ebx & bit_AVX512F)
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 6e34095..1014214 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -101,6 +101,9 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_XSAVE_SET)
#define OPTION_MASK_ISA_CLWB_SET OPTION_MASK_ISA_CLWB
#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_SET OPTION_MASK_ISA2_AVX512VP2INTERSECT
+#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
+#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
+#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@@ -194,6 +197,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET \
| OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET \
| OPTION_MASK_ISA_AVX_UNSET)
+#define OPTION_MASK_ISA2_XSAVE_UNSET OPTION_MASK_ISA2_AMX_TILE_UNSET
#define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
#define OPTION_MASK_ISA_AVX2_UNSET \
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
@@ -247,6 +251,9 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
#define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
+#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
+#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
+#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@@ -931,6 +938,47 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mamx_tile:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_TILE_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XSAVE_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_TILE_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_TILE_UNSET;
+ }
+ return true;
+
+ case OPT_mamx_int8:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_INT8_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_INT8_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_INT8_UNSET;
+ }
+ return true;
+
+ case OPT_mamx_bf16:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_BF16_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_BF16_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_BF16_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
@@ -1265,6 +1313,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XSAVE_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XSAVE_UNSET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_XSAVE_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_XSAVE_UNSET;
}
return true;
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
index 84ca97e..5b94b1f 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -216,6 +216,9 @@ enum processor_features
FEATURE_XSAVEC,
FEATURE_XSAVEOPT,
FEATURE_XSAVES,
+ FEATURE_AMX_TILE,
+ FEATURE_AMX_INT8,
+ FEATURE_AMX_BF16,
CPU_FEATURE_MAX
};
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
index 08c9dbe..3c830ea 100644
--- a/gcc/common/config/i386/i386-isas.h
+++ b/gcc/common/config/i386/i386-isas.h
@@ -160,4 +160,7 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("xsaveopt", FEATURE_XSAVEOPT, P_NONE,
"-mxsaveopt")
ISA_NAMES_TABLE_ENTRY("xsaves", FEATURE_XSAVES, P_NONE, "-mxsaves")
+ ISA_NAMES_TABLE_ENTRY("amx-tile", FEATURE_AMX_TILE, P_NONE, "-mamx-tile")
+ ISA_NAMES_TABLE_ENTRY("amx-int8", FEATURE_AMX_INT8, P_NONE, "-mamx-int8")
+ ISA_NAMES_TABLE_ENTRY("amx-bf16", FEATURE_AMX_BF16, P_NONE, "-mamx-bf16")
ISA_NAMES_TABLE_END
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 845f10e..2d0cfde 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -412,7 +412,8 @@ i[34567]86-*-*)
waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
- tsxldtrkintrin.h"
+ tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
+ amxbf16intrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -447,7 +448,8 @@ x86_64-*-*)
waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
- tsxldtrkintrin.h"
+ tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
+ amxbf16intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h
new file mode 100644
index 0000000..b162096
--- /dev/null
+++ b/gcc/config/i386/amxbf16intrin.h
@@ -0,0 +1,29 @@
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXBF16INTRIN_H_INCLUDED
+#define _AMXBF16INTRIN_H_INCLUDED
+
+#if !defined(__AMX_BF16__)
+#pragma GCC push_options
+#pragma GCC target("amx-bf16")
+#define __DISABLE_AMX_BF16__
+#endif /* __AMX_BF16__ */
+
+#if defined(__x86_64__) && defined(__AMX_BF16__)
+#define _tile_dpbf16ps_internal(dst,src1,src2) \
+ __asm__ volatile\
+ ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbf16ps(dst,src1,src2) \
+ _tile_dpbf16ps_internal (dst, src1, src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_BF16__
+#undef __DISABLE_AMX_BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_BF16__ */
+
+#endif /* _AMXBF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h
new file mode 100644
index 0000000..11adc1f
--- /dev/null
+++ b/gcc/config/i386/amxint8intrin.h
@@ -0,0 +1,38 @@
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXINT8INTRIN_H_INCLUDED
+#define _AMXINT8INTRIN_H_INCLUDED
+
+#if !defined(__AMX_INT8__)
+#pragma GCC push_options
+#pragma GCC target("amx-int8")
+#define __DISABLE_AMX_INT8__
+#endif /* __AMX_INT8__ */
+
+#if defined(__x86_64__) && defined(__AMX_INT8__)
+#define _tile_int8_dp_internal(name,dst,src1,src2) \
+ __asm__ volatile \
+ ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbssd(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
+
+#define _tile_dpbsud(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
+
+#define _tile_dpbusd(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
+
+#define _tile_dpbuud(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_INT8__
+#undef __DISABLE_AMX_INT8__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_INT8__ */
+
+#endif /* _AMXINT8INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
new file mode 100644
index 0000000..e78e5c0
--- /dev/null
+++ b/gcc/config/i386/amxtileintrin.h
@@ -0,0 +1,75 @@
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXTILEINTRIN_H_INCLUDED
+#define _AMXTILEINTRIN_H_INCLUDED
+
+#if !defined(__AMX_TILE__)
+#pragma GCC push_options
+#pragma GCC target("amx-tile")
+#define __DISABLE_AMX_TILE__
+#endif /* __AMX_TILE__ */
+
+#if defined(__x86_64__) && defined(__AMX_TILE__)
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_loadconfig (const void *__config)
+{
+ __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_storeconfig (void *__config)
+{
+ __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_release (void)
+{
+ __asm__ volatile ("tilerelease" ::);
+}
+
+#define _tile_loadd(dst,base,stride) \
+ _tile_loadd_internal (dst, base, stride)
+
+#define _tile_loadd_internal(dst,base,stride) \
+ __asm__ volatile \
+ ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \
+ :: "r" ((const void*) base), "r" ((long) stride))
+
+#define _tile_stream_loadd(dst,base,stride) \
+ _tile_stream_loadd_internal (dst, base, stride)
+
+#define _tile_stream_loadd_internal(dst,base,stride) \
+ __asm__ volatile \
+ ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \
+ :: "r" ((const void*) base), "r" ((long) stride))
+
+#define _tile_stored(dst,base,stride) \
+ _tile_stored_internal (dst, base, stride)
+
+#define _tile_stored_internal(src,base,stride) \
+ __asm__ volatile \
+ ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \
+ :: "r" ((void*) base), "r" ((long) stride) \
+ : "memory")
+
+#define _tile_zero(dst) \
+ _tile_zero_internal (dst)
+
+#define _tile_zero_internal(dst) \
+ __asm__ volatile \
+ ("tilezero\t%%tmm"#dst ::)
+
+#endif
+
+#ifdef __DISABLE_AMX_TILE__
+#undef __DISABLE_AMX_TILE__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_TILE__ */
+
+#endif /* _AMXTILEINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index bca61d62..4598434 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -127,6 +127,9 @@
#define bit_PCONFIG (1 << 18)
#define bit_SERIALIZE (1 << 14)
#define bit_TSXLDTRK (1 << 16)
+#define bit_AMX_BF16 (1 << 22)
+#define bit_AMX_TILE (1 << 24)
+#define bit_AMX_INT8 (1 << 25)
/* XFEATURE_ENABLED_MASK register bits (%eax == 0xd, %ecx == 0) */
#define bit_BNDREGS (1 << 3)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 3553a37..9da682a 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -588,6 +588,13 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__ENQCMD__");
if (isa_flag2 & OPTION_MASK_ISA2_TSXLDTRK)
def_or_undef (parse_in, "__TSXLDTRK__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_TILE)
+ def_or_undef (parse_in, "__AMX_TILE__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_INT8)
+ def_or_undef (parse_in, "__AMX_INT8__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_BF16)
+ def_or_undef (parse_in, "__AMX_BF16__");
+
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index 2fabd20..597de53 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -209,7 +209,10 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mavx512bf16", OPTION_MASK_ISA2_AVX512BF16 },
{ "-menqcmd", OPTION_MASK_ISA2_ENQCMD },
{ "-mserialize", OPTION_MASK_ISA2_SERIALIZE },
- { "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK }
+ { "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK },
+ { "-mamx-tile", OPTION_MASK_ISA2_AMX_TILE },
+ { "-mamx-int8", OPTION_MASK_ISA2_AMX_INT8 },
+ { "-mamx-bf16", OPTION_MASK_ISA2_AMX_BF16 }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1033,6 +1036,9 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("enqcmd", OPT_menqcmd),
IX86_ATTR_ISA ("serialize", OPT_mserialize),
IX86_ATTR_ISA ("tsxldtrk", OPT_mtsxldtrk),
+ IX86_ATTR_ISA ("amx-tile", OPT_mamx_tile),
+ IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8),
+ IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -2258,6 +2264,18 @@ ix86_option_override_internal (bool main_args_p,
&& !(opts->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA2_AVX512BF16))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512BF16;
+ if (((processor_alias_table[i].flags & PTA_AMX_TILE) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AMX_TILE))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE;
+ if (((processor_alias_table[i].flags & PTA_AMX_INT8) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AMX_INT8))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8;
+ if (((processor_alias_table[i].flags & PTA_AMX_BF16) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AMX_BF16))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16;
if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0)
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 92b7475..a449653 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -203,6 +203,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_SERIALIZE_P(x) TARGET_ISA2_SERIALIZE_P(x)
#define TARGET_TSXLDTRK TARGET_ISA2_TSXLDTRK
#define TARGET_TSXLDTRK_P(x) TARGET_ISA2_TSXLDTRK_P(x)
+#define TARGET_AMX_TILE TARGET_ISA2_AMX_TILE
+#define TARGET_AMX_TILE_P(x) TARGET_ISA2_AMX_TILE(x)
+#define TARGET_AMX_INT8 TARGET_ISA2_AMX_INT8
+#define TARGET_AMX_INT8_P(x) TARGET_ISA2_AMX_INT8(x)
+#define TARGET_AMX_BF16 TARGET_ISA2_AMX_BF16
+#define TARGET_AMX_BF16_P(x) TARGET_ISA2_AMX_BF16(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@@ -2466,6 +2472,9 @@ const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15);
const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16);
const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17);
const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18);
+const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19);
+const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20);
+const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21);
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -2499,7 +2508,8 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT;
const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
- | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK;
+ | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
+ | PTA_AMX_INT8 | PTA_AMX_BF16;
const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
| PTA_WAITPKG | PTA_SERIALIZE;
const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index c9f7195..9389dc2 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1114,4 +1114,16 @@ Support SERIALIZE built-in functions and code generation.
mtsxldtrk
Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
-Support TSXLDTRK built-in functions and code generation. \ No newline at end of file
+Support TSXLDTRK built-in functions and code generation.
+
+mamx-tile
+Target Report Mask(ISA2_AMX_TILE) Var(ix86_isa_flags2) Save
+Support AMX-TILE built-in functions and code generation.
+
+mamx-int8
+Target Report Mask(ISA2_AMX_INT8) Var(ix86_isa_flags2) Save
+Support AMX-INT8 built-in functions and code generation.
+
+mamx-bf16
+Target Report Mask(ISA2_AMX_BF16) Var(ix86_isa_flags2) Save
+Support AMX-BF16 built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b660d0d..6d25f44 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -144,6 +144,12 @@
#include <tsxldtrkintrin.h>
+#include <amxtileintrin.h>
+
+#include <amxint8intrin.h>
+
+#include <amxbf16intrin.h>
+
#include <rdseedintrin.h>
#include <prfchwintrin.h>
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7f14a28..c9f7299 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -6623,6 +6623,21 @@ Enable/disable the generation of the XSAVEOPT instructions.
@cindex @code{target("xsaves")} function attribute, x86
Enable/disable the generation of the XSAVES instructions.
+@item amx-tile
+@itemx no-amx-tile
+@cindex @code{target("amx-tile")} function attribute, x86
+Enable/disable the generation of the AMX-TILE instructions.
+
+@item amx-int8
+@itemx no-amx-int8
+@cindex @code{target("amx-int8")} function attribute, x86
+Enable/disable the generation of the AMX-INT8 instructions.
+
+@item amx-bf16
+@itemx no-amx-bf16
+@cindex @code{target("amx-bf16")} function attribute, x86
+Enable/disable the generation of the AMX-BF16 instructions.
+
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 226b0e1..3e087b6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1362,6 +1362,7 @@ See RS/6000 and PowerPC Options.
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
+-mamx-tile -mamx-int8 -mamx-bf16@gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
@@ -30205,6 +30206,15 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mserialize
@opindex mserialize
+@need 200
+@itemx -mamx-tile
+@opindex mamx-tile
+@need 200
+@itemx -mamx-int8
+@opindex mamx-int8
+@need 200
+@itemx -mamx-bf16
+@opindex mamx-bf16
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 65b2e55..b625f1e 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2249,6 +2249,15 @@ Target supports the execution of @code{avx512f} instructions.
@item avx512vp2intersect
Target supports the execution of @code{avx512vp2intersect} instructions.
+@item amx_tile
+Target supports the execution of @code{amx-tile} instructions.
+
+@item amx_int8
+Target supports the execution of @code{amx-int8} instructions.
+
+@item amx_bf16
+Target supports the execution of @code{amx-bf16} instructions.
+
@item cell_hw
Test system can execute AltiVec and Cell PPU instructions.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index 04d5fec..449f30d 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,11 +1,12 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
avx512bitalgintrin.h, avx512vp2intersectintrin.h, tsxldtrkintrin.h,
+ amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
avx512vp2intersectvlintrin.h and mm_malloc.h.h are usable
with -O -pedantic-errors. */
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index f40172e..29e9891 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,11 +1,12 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
avx512bitalgintrin.h, avx512vp2intersectintrin.h, tsxldtrkintrin.h,
+ amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
avx512vp2intersectvlintrin.h and mm_malloc.h are usable
with -O -fkeep-inline-functions. */
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
new file mode 100644
index 0000000..03616ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
@@ -0,0 +1,185 @@
+#ifndef AMX_CHECK_H_INCLUDED
+#define AMX_CHECK_H_INCLUDED
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+#include "cpuid.h"
+
+/* TODO: The tmm emulation is temporary for current
+ AMX implementation with no tmm regclass, should
+ be changed in the future. */
+typedef struct __tile_config
+{
+ uint8_t palette_id;
+ uint8_t start_row;
+ uint8_t reserved_0[14];
+ uint16_t colsb[8]; /* Colum size of each tmm register in bytes */
+ uint16_t reserved_1[8];
+ uint8_t rows[8]; /* Row size of each tmm reg in bytes */
+ uint8_t reserved_2[8];
+} __tilecfg;
+
+typedef union __union_tile_config
+{
+ __tilecfg s;
+ uint8_t a[64];
+} __tilecfg_u;
+
+typedef struct __tile
+{
+ /* Max size of tile register */
+ uint8_t buf[1024];
+ int rows;
+ int colsb;
+} __tile;
+
+/* Maxium col/row size in bytes */
+#define MAX_ROWS 16
+#define MAX_COLS 64
+
+/* Stride (colum width in byte) used for tileload/store */
+#define _STRIDE 64
+
+/* Initialize tile config by setting all tmm size to 16x64 */
+void init_tile_config (__tilecfg_u *dst)
+{
+ int i;
+
+ dst->s.palette_id = 1;
+ dst->s.start_row = 0;
+
+ for (i = 0; i < 14; i++)
+ dst->s.reserved_0[i] = 0;
+
+ for (i = 0; i < 8; i++)
+ {
+ dst->s.colsb[i] = _STRIDE;
+ dst->s.rows[i] = 16;
+ dst->s.reserved_1[i] = 0;
+ dst->s.reserved_2[i] = 0;
+ }
+
+ _tile_loadconfig (dst->a);
+}
+
+/* Init __tile variable that going to be store to register
+ w/o extra buffer. If buffer exists, it should be the same
+ size matrix as corresponding tmm register.
+ Should execute init_tile_config first */
+void init_tile_src (const int tmm_num, __tile *src, uint8_t *buffer)
+{
+ int rows, colsb, i, j;
+ __tilecfg_u tmp;
+
+ _tile_storeconfig (tmp.a);
+
+ src->rows = rows = tmp.s.rows[tmm_num];
+ src->colsb = colsb = tmp.s.colsb[tmm_num];
+
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < colsb; j++)
+ {
+ if(buffer)
+ src->buf[i * colsb + j] = buffer[i * colsb + j];
+ else
+ src->buf[i * colsb + j] = (i + 11 * j) % 256;
+ }
+
+}
+
+/* Init __tile src and corresponding tmm register */
+#define init_tile_reg_and_src(tmm_num, src) \
+{ \
+ init_tile_src (tmm_num, &src, NULL); \
+ _tile_loadd (tmm_num, src.buf, _STRIDE); \
+}
+
+#define init_tile_reg_and_src_with_buffer(tmm_num, src, buffer) \
+{ \
+ init_tile_src (tmm_num, &src, buffer); \
+ _tile_loadd (tmm_num, src.buf, _STRIDE); \
+}
+
+/* Zero __tile src. It should be init first. */
+void zero_tile_src (__tile *src)
+{
+ int i, j;
+
+ for (i = 0; i < src->rows; i++)
+ for (j = 0; j < src->colsb; j++)
+ src->buf[i * src->colsb + j] = 0;
+}
+
+/* Compare tile config value with __tilecfg_u dst */
+int check_tile_config (__tilecfg_u *src, __tilecfg_u *dst)
+{
+ size_t size = sizeof(__tilecfg);
+ uint8_t *pa_src = (uint8_t *) src->a;
+ uint8_t *pa_dst = (uint8_t *) dst->a;
+
+ for (int i = 0; i < size; i++)
+ if (pa_src[i] != pa_dst[i])
+ return 0;
+
+ return 1;
+}
+
+/* Compare tile register value with __tile variable */
+int check_tile_register (__tile* ref, __tile* target)
+{
+ /* Tile register should be stored from tmm to
+ memory and compare with emulation results. */
+ int rows = target->rows;
+ int colsb = target->colsb;
+ int i, j;
+
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < colsb; j++)
+ if (ref->buf[i * colsb + j] != target->buf[i * colsb + j])
+ return 0;
+
+ return 1;
+}
+
+#ifndef DO_TEST
+#define DO_TEST do_test
+static void test_amx (void);
+__attribute__ ((noinline))
+static void
+do_test (void)
+{
+ test_amx ();
+}
+#endif
+
+int
+main ()
+{
+ /* Check cpu support for AMX */
+ if (__builtin_cpu_supports ("amx-tile")
+#ifdef AMX_INT8
+ && __builtin_cpu_supports ("amx-int8")
+#endif
+#ifdef AMX_BF16
+ && __builtin_cpu_supports ("amx-bf16")
+#endif
+ )
+ {
+ DO_TEST ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
+
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c
new file mode 100644
index 0000000..a5e5bdd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-bf16" } */
+/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
+#include <immintrin.h>
+
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+
+void TEST ()
+{
+ _tile_dpbf16ps (TMM1, TMM2, TMM3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c
new file mode 100644
index 0000000..c2d6074
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-bf16 -masm=intel" } */
+/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
+#include <immintrin.h>
+
+void TEST ()
+{
+ _tile_dpbf16ps (1, 2, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
new file mode 100644
index 0000000..c819113
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
@@ -0,0 +1,83 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile -mamx-bf16" } */
+#include <immintrin.h>
+
+#define AMX_BF16
+#define DO_TEST test_amx_bf16_dpbf16ps
+void test_amx_bf16_dpbf16ps ();
+#include "amx-check.h"
+
+/* Transformation functions between bf16/float */
+static uint16_t make_bf16 (float f)
+{
+ uint32_t u = (uint32_t)f;
+ u = (u >> 16) & 0xffff;
+ return (uint16_t)u;
+}
+
+static float make_f32 (uint16_t bf)
+{
+ uint32_t u = (uint32_t)(bf << 16);
+ return (float)u;
+}
+
+/* Init tile buffer with bf16 pairs */
+void init_bf16_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ uint16_t *ptr = (uint16_t *)buf;
+
+ for(i = 0; i < 16; i++)
+ for(j = 0; j < 32; j++)
+ {
+ float f = 16.1f * i + 3.4f * j;
+ ptr[i * 32 + j] = make_bf16(f);
+ }
+}
+
+void calc_matrix_dpbf16ps (__tile *dst, __tile *src1, __tile *src2)
+{
+ uint16_t *src1_buf = (uint16_t *)src1->buf;
+ uint16_t *src2_buf = (uint16_t *)src2->buf;
+ float *dst_buf = (float *)dst->buf;
+
+ int M = src1->rows;
+ int N = src1->colsb / 4;
+ int K = src2->colsb / 4;
+ int i, j, k, t;
+
+ for (i = 0; i < M; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < K; k++)
+ for (t = 0; t < 2; t+=2)
+ {
+ dst_buf[i * N + k] +=
+ (make_f32(src1_buf[i * 4 * N + 4 * j + t]) *
+ make_f32(src2_buf[j * 4 * K + 4 * k + t])) +
+ (make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]) *
+ make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]));
+ }
+
+}
+
+void test_amx_bf16_dpbf16ps ()
+{
+ __tilecfg_u cfg;
+ __tile dst, dst_ref, src1, src2;
+ uint8_t tmp_dst_buf[1024];
+
+ init_bf16_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
+ init_tile_reg_and_src_with_buffer (2, dst, tmp_dst_buf);
+ init_tile_reg_and_src_with_buffer (3, dst, tmp_dst_buf);
+
+ calc_matrix_dpbf16ps (&dst, &src1, &src2);
+
+ _tile_dpbf16ps (1, 2, 3);
+ _tile_stored (1, dst_ref.buf, _STRIDE);
+
+ if (!check_tile_register (&dst_ref, &dst))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c
new file mode 100644
index 0000000..1842c23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-int8" } */
+/* { dg-final { scan-assembler "tdpbssd\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
+/* { dg-final { scan-assembler "tdpbsud\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } *
+/* { dg-final { scan-assembler "tdpbusd\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
+/* { dg-final { scan-assembler "tdpbuud\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
+#include <immintrin.h>
+
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+
+void TEST ()
+{
+ _tile_dpbssd (TMM1, TMM2, TMM3);
+ _tile_dpbsud (TMM1, TMM2, TMM3);
+ _tile_dpbusd (TMM1, TMM2, TMM3);
+ _tile_dpbuud (TMM1, TMM2, TMM3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c
new file mode 100644
index 0000000..bcfbb3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-int8 -masm=intel" } */
+/* { dg-final { scan-assembler "tdpbssd\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
+/* { dg-final { scan-assembler "tdpbsud\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } *
+/* { dg-final { scan-assembler "tdpbusd\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
+/* { dg-final { scan-assembler "tdpbuud\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
+#include <immintrin.h>
+
+void TEST ()
+{
+ _tile_dpbssd (1, 2, 3);
+ _tile_dpbsud (1, 2, 3);
+ _tile_dpbusd (1, 2, 3);
+ _tile_dpbuud (1, 2, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c
new file mode 100644
index 0000000..62d31ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
+#include <immintrin.h>
+
+#define AMX_INT8
+#define DO_TEST test_amx_int8_dpbssd
+void test_amx_int8_dpbssd ();
+#include "amx-check.h"
+
+/* Init tile buffer with int32 value*/
+void init_i32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ int *ptr = (int *)buf;
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = 2 * i - (16 - j);
+}
+
+void calc_matrix_dpbssd (__tile *dst, __tile *src1, __tile *src2)
+{
+ int8_t *src1_buf = (int8_t *)src1->buf;
+ int8_t *src2_buf = (int8_t *)src2->buf;
+ int *dst_buf = (int *)dst->buf;
+
+ int M = src1->rows;
+ int N = src1->colsb / 4;
+ int K = src2->colsb / 4;
+ int i, j, k, t;
+
+ for (i = 0; i < M; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < K; k++)
+ for (t = 0; t < 4; t++)
+ {
+ dst_buf[i * N + k] +=
+ ((int) src1_buf[i * 4 * N + 4 * j + t]) *
+ ((int) src2_buf[j * 4 * K + 4 * k + t]);
+ }
+}
+
+void test_amx_int8_dpbssd ()
+{
+ __tilecfg_u cfg;
+ __tile dst, dst_ref, src1, src2;
+ uint8_t tmp_dst_buf[1024];
+
+ init_i32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
+ init_tile_reg_and_src (2, src1);
+ init_tile_reg_and_src (3, src2);
+
+ calc_matrix_dpbssd (&dst, &src1, &src2);
+
+ _tile_dpbssd (1, 2, 3);
+ _tile_stored (1, dst_ref.buf, _STRIDE);
+
+ if (!check_tile_register (&dst_ref, &dst))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c
new file mode 100644
index 0000000..5007ee9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
+#include <immintrin.h>
+
+#define AMX_INT8
+#define DO_TEST test_amx_int8_dpbsud
+void test_amx_int8_dpbsud ();
+#include "amx-check.h"
+
+/* Init tile buffer with int32 value*/
+void init_i32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ int *ptr = (int *)buf;
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = 2 * i - (16 - j);
+}
+
+void calc_matrix_dpbsud (__tile *dst, __tile *src1, __tile *src2)
+{
+ int8_t *src1_buf = (int8_t *)src1->buf;
+ uint8_t *src2_buf = (uint8_t *)src2->buf;
+ int *dst_buf = (int *)dst->buf;
+
+ int M = src1->rows;
+ int N = src1->colsb / 4;
+ int K = src2->colsb / 4;
+ int i, j, k, t;
+
+ for (i = 0; i < M; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < K; k++)
+ for (t = 0; t < 4; t++)
+ {
+ dst_buf[i * N + k] +=
+ ((int) src1_buf[i * 4 * N + 4 * j + t]) *
+ ((unsigned) src2_buf[j * 4 * K + 4 * k + t]);
+ }
+}
+
+void test_amx_int8_dpbsud ()
+{
+ __tilecfg_u cfg;
+ __tile dst, dst_ref, src1, src2;
+ uint8_t tmp_dst_buf[1024];
+
+ init_i32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
+ init_tile_reg_and_src (2, src1);
+ init_tile_reg_and_src (3, src2);
+
+ calc_matrix_dpbsud (&dst, &src1, &src2);
+ _tile_dpbsud (1, 2, 3);
+ _tile_stored (1, dst_ref.buf, _STRIDE);
+
+ if (!check_tile_register (&dst_ref, &dst))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c
new file mode 100644
index 0000000..17888e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
+#include <immintrin.h>
+
+#define AMX_INT8
+#define DO_TEST test_amx_int8_dpbusd
+void test_amx_int8_dpbusd ();
+#include "amx-check.h"
+
+/* Init tile buffer with int32 value*/
+void init_i32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ int *ptr = (int *)buf;
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = 2 * i - (16 - j);
+}
+
+void calc_matrix_dpbusd (__tile *dst, __tile *src1, __tile *src2)
+{
+ uint8_t *src1_buf = (uint8_t *)src1->buf;
+ int8_t *src2_buf = (int8_t *)src2->buf;
+ int *dst_buf = (int *)dst->buf;
+
+ int M = src1->rows;
+ int N = src1->colsb / 4;
+ int K = src2->colsb / 4;
+ int i, j, k, t;
+
+ for (i = 0; i < M; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < K; k++)
+ for (t = 0; t < 4; t++)
+ {
+ dst_buf[i * N + k] +=
+ ((unsigned) src1_buf[i * 4 * N + 4 * j + t]) *
+ ((int) src2_buf[j * 4 * K + 4 * k + t]);
+ }
+}
+
+void test_amx_int8_dpbusd ()
+{
+ __tilecfg_u cfg;
+ __tile dst, dst_ref, src1, src2;
+ uint8_t tmp_dst_buf[1024];
+
+ init_i32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
+ init_tile_reg_and_src (2, src1);
+ init_tile_reg_and_src (3, src2);
+
+ calc_matrix_dpbusd (&dst, &src1, &src2);
+ _tile_dpbusd (1, 2, 3);
+ _tile_stored (1, dst_ref.buf, _STRIDE);
+
+ if (!check_tile_register (&dst_ref, &dst))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c
new file mode 100644
index 0000000..c39666c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile -mamx-int8" } */
+#include <immintrin.h>
+
+#define AMX_INT8
+#define DO_TEST test_amx_int8_dpbuud
+void test_amx_int8_dpbuud ();
+#include "amx-check.h"
+
+/* Init tile buffer with int32 value*/
+void init_i32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ int *ptr = (int *)buf;
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = 2 * i - (16 - j);
+}
+
+void calc_matrix_dpbuud (__tile *dst, __tile *src1, __tile *src2)
+{
+ uint8_t *src1_buf = (uint8_t *)src1->buf;
+ uint8_t *src2_buf = (uint8_t *)src2->buf;
+ int *dst_buf = (int *)dst->buf;
+
+ int M = src1->rows;
+ int N = src1->colsb / 4;
+ int K = src2->colsb / 4;
+ int i, j, k, t;
+
+ for (i = 0; i < M; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < K; k++)
+ for (t = 0; t < 4; t++)
+ {
+ dst_buf[i * N + k] +=
+ ((unsigned) src1_buf[i * 4 * N + 4 * j + t]) *
+ ((unsigned) src2_buf[j * 4 * K + 4 * k + t]);
+ }
+}
+
+void test_amx_int8_dpbuud ()
+{
+ __tilecfg_u cfg;
+ __tile dst, dst_ref, src1, src2;
+ uint8_t tmp_dst_buf[1024];
+
+ init_i32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
+ init_tile_reg_and_src (2, src1);
+ init_tile_reg_and_src (3, src2);
+
+ calc_matrix_dpbuud (&dst, &src1, &src2);
+ _tile_dpbuud (1, 2, 3);
+ _tile_stored (1, dst_ref.buf, _STRIDE);
+
+ if (!check_tile_register (&dst_ref, &dst))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxtile-2.c b/gcc/testsuite/gcc.target/i386/amxtile-2.c
new file mode 100644
index 0000000..cef84f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxtile-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile " } */
+#include <immintrin.h>
+
+#define DO_TEST test_amx_tile
+void test_amx_tile ();
+#include "amx-check.h"
+
+void test_amx_tile ()
+{
+ __tilecfg_u cfg_src, cfg_dst;
+ __tile reg_src1, reg_src2, reg_ref;
+
+ /* check tile config load & store. */
+ init_tile_config (&cfg_src);
+ _tile_storeconfig (cfg_dst.a);
+
+ if (!check_tile_config (&cfg_src, &cfg_dst))
+ abort ();
+
+ /* check tile register load & store. */
+ init_tile_reg_and_src (1, reg_src1);
+ _tile_stored (1, reg_ref.buf, _STRIDE);
+ if (!check_tile_register (&reg_ref, &reg_src1))
+ abort ();
+
+ /* check tile stream load instruction */
+ init_tile_src (2, &reg_src2, NULL);
+ _tile_stream_loadd (2, reg_src2.buf, _STRIDE);
+ _tile_stored (2, reg_ref.buf, _STRIDE);
+ if (!check_tile_register (&reg_ref, &reg_src2))
+ abort ();
+
+ /* check tile register zeroing */
+ zero_tile_src (&reg_src2);
+ _tile_zero (2);
+ _tile_stored (2, reg_ref.buf, _STRIDE);
+ if (!check_tile_register (&reg_ref, &reg_src2))
+ abort ();
+
+ /* check tile cfg zeroing */
+ memset (cfg_dst.a, 0, sizeof(__tilecfg));
+ _tile_release ();
+ _tile_storeconfig (cfg_src.a);
+ if (!check_tile_config (&cfg_src, &cfg_dst))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c
new file mode 100644
index 0000000..ceb5fa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile " } */
+/* { dg-final { scan-assembler "ldtilecfg\[ \\t]+\(\[^\)\n\]*\)" } } */
+/* { dg-final { scan-assembler "sttilecfg\[ \\t]+\(\[^\)\n\]*\)" } } */
+/* { dg-final { scan-assembler "tilerelease" } } */
+/* { dg-final { scan-assembler "tileloadd\[ \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "tileloaddt1\[ \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "tilestored\[ \\t]+\[^\n\]*%tmm\[0-9\]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)" } } */
+/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */
+#include <immintrin.h>
+
+extern int a[];
+extern const void* base;
+extern const int stride;
+
+#define TMM0 0
+#define TMM1 1
+#define TMM2 2
+#define TMM3 3
+
+void TEST ()
+{
+ _tile_loadconfig (a);
+ _tile_storeconfig (a);
+ _tile_release ();
+ _tile_loadd (TMM3, base, stride);
+ _tile_stream_loadd (TMM2, base, stride);
+ _tile_stored (TMM1, base, stride);
+ _tile_zero (TMM0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c
new file mode 100644
index 0000000..88ef612
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile -masm=intel " } */
+/* { dg-final { scan-assembler "ldtilecfg\[ \\t]" } } */
+/* { dg-final { scan-assembler "sttilecfg\[ \\t]" } } */
+/* { dg-final { scan-assembler "tilerelease" } } */
+/* { dg-final { scan-assembler "tileloadd\[ \\t]%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "tileloaddt1\[ \\t]%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "tilestored\[ \\t]\[^\n\]+\[^\n\]*%tmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */
+#include <immintrin.h>
+
+extern int a[];
+extern const void* base;
+extern const int stride;
+void TEST ()
+{
+ _tile_loadconfig (a);
+ _tile_storeconfig (a);
+ _tile_release ();
+ _tile_loadd (5, base, stride);
+ _tile_stream_loadd (4, base, stride);
+ _tile_stored (3, base, stride);
+ _tile_zero (2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 94ffbb6..8e669f1 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -71,6 +71,9 @@ extern void test_tsxldtrk (void) __attribute__((__target__("tsxldtrk")));
extern void test_enqcmd (void) __attribute__((__target__("enqcmd")));
extern void test_avx512bf16 (void) __attribute__((__target__("avx512bf16")));
extern void test_avx512vp2intersect (void) __attribute__((__target__("avx512vp2intersect")));
+extern void test_amx_tile (void) __attribute__((__target__("amx-tile")));
+extern void test_amx_int8 (void) __attribute__((__target__("amx-int8")));
+extern void test_amx_bf16 (void) __attribute__((__target__("amx-bf16")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
@@ -143,6 +146,9 @@ extern void test_no_tsxldtrk (void) __attribute__((__target__("no-tsxldtrk")));
extern void test_no_enqcmd (void) __attribute__((__target__("no-enqcmd")));
extern void test_no_avx512bf16 (void) __attribute__((__target__("no-avx512bf16")));
extern void test_no_avx512vp2intersect (void) __attribute__((__target__("no-avx512vp2intersect")));
+extern void test_no_amx_tile (void) __attribute__((__target__("no-amx-tile")));
+extern void test_no_amx_int8 (void) __attribute__((__target__("no-amx-int8")));
+extern void test_no_amx_bf16 (void) __attribute__((__target__("no-amx-bf16")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index b1690d7..61146b2b 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 3a640470..4d6c9b3 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index edaa2aa..837b51c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 7364b2f..fc75669f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -11,6 +11,7 @@
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxldtrkintrin.h,
avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
avx512bitalgintrin.h, avx512vp2intersectintrin.h,
+ amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper
builtin functions.
Defining away "extern" and "__inline" results in all of them being
@@ -102,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
#endif
/* Following intrinsics require immediate arguments. They
@@ -219,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index eaadebe..9ca7c5d 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -10,6 +10,7 @@
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxtrkintrin.h,
avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
avx512bitalgintrin.h, avx512vp2intersectintrin.h,
+ amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper
builtin functions.
Defining away "extern" and "__inline" results in all of them being
@@ -697,6 +698,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
#include <x86intrin.h>
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 0a00972..8314e44 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8956,6 +8956,39 @@ proc check_effective_target_avx512vaes { } {
} "-mvaes" ]
}
+# Return 1 if amx-tile instructions can be compiled.
+proc check_effective_target_amx_tile { } {
+ return [check_no_compiler_messages amx_tile object {
+ void
+ foo ()
+ {
+ __asm__ volatile ("tilerelease" ::);
+ }
+ } "-mamx-tile" ]
+}
+
+# Return 1 if amx-int8 instructions can be compiled.
+proc check_effective_target_amx_int8 { } {
+ return [check_no_compiler_messages amx_int8 object {
+ void
+ foo ()
+ {
+ __asm__ volatile ("tdpbssd\t%%tmm1, %%tmm2, %%tmm3" ::);
+ }
+ } "-mamx-int8" ]
+}
+
+# Return 1 if amx-bf16 instructions can be compiled.
+proc check_effective_target_amx_bf16 { } {
+ return [check_no_compiler_messages amx_bf16 object {
+ void
+ foo ()
+ {
+ __asm__ volatile ("tdpbf16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
+ }
+ } "-mamx-bf16" ]
+}
+
# Return 1 if vpclmulqdq instructions can be compiled.
proc check_effective_target_vpclmulqdq { } {
return [check_no_compiler_messages vpclmulqdq object {