aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2024-11-01 10:04:34 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-11-01 10:09:36 +0800
commit343f8113385d00e9ffac53150bca4f78be30e19c (patch)
treeb2838f3b54b157cc20dc0f8f349ef924b3fa1c9e
parent8ee5cd4b84489bee0f72153e96a9afe9493e170d (diff)
downloadgcc-343f8113385d00e9ffac53150bca4f78be30e19c.zip
gcc-343f8113385d00e9ffac53150bca4f78be30e19c.tar.gz
gcc-343f8113385d00e9ffac53150bca4f78be30e19c.tar.bz2
Support Intel AMX-AVX512
gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_available_features): Detect AMX-AVX512. * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_AVX512_SET, OPTION_MASK_ISA2_AMX_AVX512_UNSET): New. (ix86_handle_option): Handle -mamx-avx512. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AMX_AVX512. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for amx-avx512. * config.gcc: Add amxavx512intrin.h * config/i386/cpuid.h (bit_AMX_AVX512): New. * config/i386/i386-c.cc (ix86_target_macros_internal): Handle amx-avx512. * config/i386/i386-isa.def (AMX_AVX512): Add DEF_PTA(AMX_AVX512). * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): Handle amx-avx512. * config/i386/i386.opt: Add option -mamx-avx512. * config/i386/i386.opt.urls: Regenerated. * config/i386/immintrin.h: Include amxavx512intrin.h * doc/extend.texi: Document amx-avx512. * doc/invoke.texi: Document -mamx-avx512. * doc/sourcebuild.texi: Document target amx-avx512. * config/i386/amxavx512intrin.h: New file. gcc/testsuite/ChangeLog: * g++.dg/other/i386-2.C: Add -mamx-avx512. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/amx-check.h: Add cpu check for AMX-AVX512. * gcc.target/i386/amx-helper.h: Support amx-avx512. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/sse-12.c: Add -mamx-avx512. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Add amx-avx512. * gcc.target/i386/sse-23.c: Ditto. * lib/target-supports.exp (check_effective_target_amx_avx512): New. * gcc.target/i386/amxavx512-asmatt-1.c: New test. * gcc.target/i386/amxavx512-asmintel-1.c: Ditto. * gcc.target/i386/amxavx512-cvtrowd2ps-2.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2ph-2.c: Ditto. * gcc.target/i386/amxavx512-movrow-2.c: Ditto. Co-authored-by: Yu, Bing <bing1.yu@intel.com>
-rw-r--r--gcc/common/config/i386/cpuinfo.h11
-rw-r--r--gcc/common/config/i386/i386-common.cc24
-rw-r--r--gcc/common/config/i386/i386-cpuinfo.h1
-rw-r--r--gcc/common/config/i386/i386-isas.h2
-rw-r--r--gcc/config.gcc2
-rw-r--r--gcc/config/i386/amxavx512intrin.h189
-rw-r--r--gcc/config/i386/cpuid.h4
-rw-r--r--gcc/config/i386/i386-c.cc2
-rw-r--r--gcc/config/i386/i386-isa.def1
-rw-r--r--gcc/config/i386/i386-options.cc4
-rw-r--r--gcc/config/i386/i386.opt5
-rw-r--r--gcc/config/i386/i386.opt.urls3
-rw-r--r--gcc/config/i386/immintrin.h2
-rw-r--r--gcc/doc/extend.texi5
-rw-r--r--gcc/doc/invoke.texi11
-rw-r--r--gcc/doc/sourcebuild.texi3
-rw-r--r--gcc/testsuite/g++.dg/other/i386-2.C2
-rw-r--r--gcc/testsuite/g++.dg/other/i386-3.C2
-rw-r--r--gcc/testsuite/gcc.target/i386/amx-check.h3
-rw-r--r--gcc/testsuite/gcc.target/i386/amx-helper.h105
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/funcspec-56.inc2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c2
-rw-r--r--gcc/testsuite/lib/target-supports.exp11
33 files changed, 733 insertions, 19 deletions
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index e3eb6e9..67724c3 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -995,6 +995,17 @@ get_available_features (struct __processor_model *cpu_model,
}
}
+ /* Get Advanced Features at level 0x1e (eax = 0x1e, ecx = 1). */
+ if (max_cpuid_level >= 0x1e)
+ {
+ __cpuid_count (0x1e, 1, eax, ebx, ecx, edx);
+ if (amx_usable)
+ {
+ if (eax & bit_AMX_AVX512)
+ set_feature (FEATURE_AMX_AVX512);
+ }
+ }
+
/* Get Advanced Features at level 0x24 (eax = 0x24, ecx = 0). */
if (avx10_set && max_cpuid_level >= 0x24)
{
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 4a213f5..e8e3eb1 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -127,6 +127,9 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AVX10_2_512_SET \
(OPTION_MASK_ISA2_AVX10_1_512_SET | OPTION_MASK_ISA2_AVX10_2_256_SET \
| OPTION_MASK_ISA2_AVX10_2_512)
+#define OPTION_MASK_ISA2_AMX_AVX512_SET \
+ (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AVX10_2_512_SET \
+ | OPTION_MASK_ISA2_AMX_AVX512)
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@@ -289,7 +292,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AMX_TILE_UNSET \
(OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_INT8_UNSET \
| OPTION_MASK_ISA2_AMX_BF16_UNSET | OPTION_MASK_ISA2_AMX_FP16_UNSET \
- | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET)
+ | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET | OPTION_MASK_ISA2_AMX_AVX512_UNSET)
#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
@@ -317,7 +320,9 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AVX10_1_512_UNSET \
(OPTION_MASK_ISA2_AVX10_1_512 | OPTION_MASK_ISA2_AVX10_2_512_UNSET)
#define OPTION_MASK_ISA2_AVX10_2_256_UNSET OPTION_MASK_ISA2_AVX10_2_256
-#define OPTION_MASK_ISA2_AVX10_2_512_UNSET OPTION_MASK_ISA2_AVX10_2_512
+#define OPTION_MASK_ISA2_AVX10_2_512_UNSET \
+ (OPTION_MASK_ISA2_AVX10_2_512 | OPTION_MASK_ISA2_AMX_AVX512_UNSET)
+#define OPTION_MASK_ISA2_AMX_AVX512_UNSET OPTION_MASK_ISA2_AMX_AVX512
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@@ -1409,6 +1414,21 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mamx_avx512:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_AVX512_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_AVX512_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_AVX512_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_AVX512_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
index b573166..cc5bb0d 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -269,6 +269,7 @@ enum processor_features
FEATURE_AVX10_1_512,
FEATURE_AVX10_2_256,
FEATURE_AVX10_2_512,
+ FEATURE_AMX_AVX512,
CPU_FEATURE_MAX
};
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
index a7c7e63..7ea852a 100644
--- a/gcc/common/config/i386/i386-isas.h
+++ b/gcc/common/config/i386/i386-isas.h
@@ -189,4 +189,6 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("avx10.2", FEATURE_AVX10_2_256, P_NONE, "-mavx10.2")
ISA_NAMES_TABLE_ENTRY("avx10.2-256", FEATURE_AVX10_2_256, P_NONE, "-mavx10.2-256")
ISA_NAMES_TABLE_ENTRY("avx10.2-512", FEATURE_AVX10_2_512, P_NONE, "-mavx10.2-512")
+ ISA_NAMES_TABLE_ENTRY("amx-avx512", FEATURE_AMX_AVX512, P_NONE,
+ "-mamx-avx512")
ISA_NAMES_TABLE_END
diff --git a/gcc/config.gcc b/gcc/config.gcc
index c3531e5..5d0240e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -458,7 +458,7 @@ i[34567]86-*-* | x86_64-*-*)
avx10_2bf16intrin.h avx10_2-512bf16intrin.h
avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h
- avx10_2copyintrin.h"
+ avx10_2copyintrin.h amxavx512intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h
new file mode 100644
index 0000000..146a981
--- /dev/null
+++ b/gcc/config/i386/amxavx512intrin.h
@@ -0,0 +1,189 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXAVX512INTRIN_H_INCLUDED
+#define _AMXAVX512INTRIN_H_INCLUDED
+
+#if !defined(__AMX_AVX512__)
+#pragma GCC push_options
+#pragma GCC target("amx-avx512")
+#define __DISABLE_AMX_AVX512__
+#endif /* __AMX_AVX512__ */
+
+#if defined(__x86_64__)
+#define _tile_cvtrowd2ps_internal(src,A) \
+({ \
+ __m512 dst; \
+ __asm__ volatile \
+ ("{tcvtrowd2ps\t%1, %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", %1}" \
+ : "=v" (dst) : "r" ((unsigned) (A))); \
+ dst; \
+})
+
+#define _tile_cvtrowd2psi_internal(src,imm) \
+({ \
+ __m512 dst; \
+ __asm__ volatile \
+ ("{tcvtrowd2ps\t$"#imm", %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", "#imm"}" \
+ : "=v" (dst) :); \
+ dst; \
+})
+
+#define _tile_cvtrowps2pbf16h_internal(src,A) \
+({ \
+ __m512bh dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2pbf16h\t%1, %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", %1}" \
+ : "=v" (dst) : "r" ((unsigned) (A))); \
+ dst; \
+})
+
+#define _tile_cvtrowps2pbf16hi_internal(src,imm) \
+({ \
+ __m512bh dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2pbf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", "#imm"}" \
+ : "=v" (dst) :); \
+ dst; \
+})
+
+#define _tile_cvtrowps2pbf16l_internal(src,A) \
+({ \
+ __m512bh dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2pbf16l\t%1, %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", %1}" \
+ : "=v" (dst) : "r" ((unsigned) (A))); \
+ dst; \
+})
+
+#define _tile_cvtrowps2pbf16li_internal(src,imm) \
+({ \
+ __m512bh dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2pbf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", "#imm"}" \
+ : "=v" (dst) :); \
+ dst; \
+})
+
+#define _tile_cvtrowps2phh_internal(src,A) \
+({ \
+ __m512h dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2phh\t%1, %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", %1}" \
+ : "=v" (dst) : "r" ((unsigned) (A))); \
+ dst; \
+})
+
+#define _tile_cvtrowps2phhi_internal(src,imm) \
+({ \
+ __m512h dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2phh\t$"#imm", %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", "#imm"}" \
+ : "=v" (dst) :); \
+ dst; \
+})
+
+#define _tile_cvtrowps2phl_internal(src,A) \
+({ \
+ __m512h dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2phl\t%1, %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", %1}" \
+ : "=v" (dst) : "r" ((unsigned) (A))); \
+ dst; \
+})
+
+#define _tile_cvtrowps2phli_internal(src,imm) \
+({ \
+ __m512h dst; \
+ __asm__ volatile \
+ ("{tcvtrowps2phl\t$"#imm", %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", "#imm"}" \
+ : "=v" (dst) :); \
+ dst; \
+})
+
+#define _tile_movrow_internal(src,A) \
+({ \
+ __m512 dst; \
+ __asm__ volatile \
+ ("{tilemovrow\t%1, %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", %1}" \
+ : "=v" (dst) : "r" ((unsigned) (A))); \
+ dst; \
+})
+
+#define _tile_movrowi_internal(src,imm) \
+({ \
+ __m512 dst; \
+ __asm__ volatile \
+ ("{tilemovrow\t$"#imm", %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", "#imm"}" \
+ : "=v" (dst) :); \
+ dst; \
+})
+
+#define _tile_cvtrowd2ps(src,A) \
+ _tile_cvtrowd2ps_internal (src,A)
+
+#define _tile_cvtrowd2psi(src,imm) \
+ _tile_cvtrowd2psi_internal (src,imm)
+
+#define _tile_cvtrowps2pbf16h(src,A) \
+ _tile_cvtrowps2pbf16h_internal (src,A)
+
+#define _tile_cvtrowps2pbf16hi(src,imm) \
+ _tile_cvtrowps2pbf16hi_internal (src,imm)
+
+#define _tile_cvtrowps2pbf16l(src,A) \
+ _tile_cvtrowps2pbf16l_internal (src,A)
+
+#define _tile_cvtrowps2pbf16li(src,imm) \
+ _tile_cvtrowps2pbf16li_internal (src,imm)
+
+#define _tile_cvtrowps2phh(src,A) \
+ _tile_cvtrowps2phh_internal (src,A)
+
+#define _tile_cvtrowps2phhi(src,imm) \
+ _tile_cvtrowps2phhi_internal (src,imm)
+
+#define _tile_cvtrowps2phl(src,A) \
+ _tile_cvtrowps2phl_internal (src,A)
+
+#define _tile_cvtrowps2phli(src,imm) \
+ _tile_cvtrowps2phli_internal (src,imm)
+
+#define _tile_movrow(src,A) \
+ _tile_movrow_internal (src,A)
+
+#define _tile_movrowi(src,imm) \
+ _tile_movrowi_internal (src,imm)
+
+#endif
+
+#ifdef __DISABLE_AMX_AVX512__
+#undef __DISABLE_AMX_AVX512__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_AVX512__ */
+
+#endif /* _AMXAVX512INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index a75ba2b..2fc163b 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -162,6 +162,10 @@
#define bit_AESKLE ( 1<<0 )
#define bit_WIDEKL ( 1<<2 )
+/* AMX sub leaf (%eax == 0x1e, %ecx == 1) */
+/* %eax */
+#define bit_AMX_AVX512 (1 << 7)
+
/* AVX10 sub leaf (%eax == 0x24) */
/* %ebx */
#define bit_AVX10_256 (1 << 17)
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 72435fe..1c36beb 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -741,6 +741,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVX10_2_256__");
if (isa_flag2 & OPTION_MASK_ISA2_AVX10_2_512)
def_or_undef (parse_in, "__AVX10_2_512__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_AVX512)
+ def_or_undef (parse_in, "__AMX_AVX512__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index bfb33ba..fcc3bc4 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -123,3 +123,4 @@ DEF_PTA(AVX10_1_256)
DEF_PTA(AVX10_1_512)
DEF_PTA(AVX10_2_256)
DEF_PTA(AVX10_2_512)
+DEF_PTA(AMX_AVX512)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 38037de..2f6646f 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -263,7 +263,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mavx10.1-256", OPTION_MASK_ISA2_AVX10_1_256 },
{ "-mavx10.1-512", OPTION_MASK_ISA2_AVX10_1_512 },
{ "-mavx10.2-256", OPTION_MASK_ISA2_AVX10_2_256 },
- { "-mavx10.2-512", OPTION_MASK_ISA2_AVX10_2_512 }
+ { "-mavx10.2-512", OPTION_MASK_ISA2_AVX10_2_512 },
+ { "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1132,6 +1133,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2_256),
IX86_ATTR_ISA ("avx10.2-256", OPT_mavx10_2_256),
IX86_ATTR_ISA ("avx10.2-512", OPT_mavx10_2_512),
+ IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 64c295d..232daff 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1389,3 +1389,8 @@ mavx10.2
Target Alias(mavx10.2-256)
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
AVX10.1 and AVX10.2 built-in functions and code generation.
+
+mamx-avx512
+Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512,
+AVX10.2-512 and AMX-AVX512 built-in functions and code generation.
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index fc70616..9f590f6 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -613,3 +613,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2-512)
mavx10.2
UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2)
+mamx-avx512
+UrlSuffix(gcc/x86-Options.html#index-mamx-avx512)
+
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 6b8035e..772af56 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -132,6 +132,8 @@
#include <amxcomplexintrin.h>
+#include <amxavx512intrin.h>
+
#include <prfchwintrin.h>
#include <keylockerintrin.h>
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index f97e008..d2b3086 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7544,6 +7544,11 @@ Enable/disbale the generation of the AVX10.2 instructions.
@itemx no-avx10.2-512
Enable/disable the generation of the AVX10.2 512 bit instructions.
+@cindex @code{target("amx-avx512")} function attribute, x86
+@item amx-avx512
+@itemx no-amx-avx512
+Enable/disable the generation of the AMX-AVX512 instructions.
+
@cindex @code{target("cld")} function attribute, x86
@item cld
@itemx no-cld
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 19c148a..1186bdd 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1486,7 +1486,7 @@ See RS/6000 and PowerPC Options.
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
-mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf
-musermsr -mavx10.1 -mavx10.1-256 -mavx10.1-512 -mevex512 -mavx10.2 -mavx10.2-256
--mavx10.2-512
+-mavx10.2-512 -mamx-avx512
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@@ -35674,6 +35674,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@opindex mavx10.2-512
@itemx -mavx10.2-512
+@need 200
+@opindex mamx-avx512
+@itemx -mamx-avx512
These switches enable the use of instructions in the MMX, SSE,
AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES,
PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
@@ -35684,9 +35687,9 @@ WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD,
AVX512VPOPCNTDQ, AVX512VNNI, SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8,
AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT,
CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512,
-SM4, APX_F, USER_MSR, AVX10.1, AVX10.2 or CLDEMOTE extended instruction sets.
-Each has a corresponding @option{-mno-} option to disable use of these
-instructions.
+SM4, APX_F, USER_MSR, AVX10.1, AVX10.2, AMX-AVX512 or CLDEMOTE extended
+instruction sets. Each has a corresponding @option{-mno-} option to disable
+use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 38275fd..0dfbc57 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2644,6 +2644,9 @@ Target supports the execution of @code{amx-int8} instructions.
@item amx_bf16
Target supports the execution of @code{amx-bf16} instructions.
+@item amx_avx512
+Target supports the execution of @code{amx-avx512} instructions.
+
@item amx_complex
Target supports the execution of @code{amx-complex} instructions.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index d0492dc..8e872f7 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
/* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 3bfc839..133e64f 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
/* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
index f1a04cf..a336392 100644
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
@@ -219,6 +219,9 @@ main ()
#ifdef AMX_COMPLEX
&& __builtin_cpu_supports ("amx-complex")
#endif
+#ifdef AMX_AVX512
+ && __builtin_cpu_supports ("amx-avx512")
+#endif
#ifdef __linux__
&& request_perm_xtile_data ()
#endif
diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h
index 6ed9f5e..847882d 100644
--- a/gcc/testsuite/gcc.target/i386/amx-helper.h
+++ b/gcc/testsuite/gcc.target/i386/amx-helper.h
@@ -1,9 +1,7 @@
#ifndef AMX_HELPER_H_INCLUDED
#define AMX_HELPER_H_INCLUDED
-#if defined(AMX_FP16) || defined(AMX_COMPLEX)
#include <immintrin.h>
#include <xmmintrin.h>
-#endif
#include "amx-check.h"
typedef union
@@ -12,7 +10,25 @@ typedef union
uint16_t u;
} union16f_uw;
-#if defined(AMX_FP16) || defined(AMX_COMPLEX)
+typedef union
+{
+ __bf16 bf16;
+ uint16_t u;
+} union16bh_uw;
+
+typedef union
+{
+ float f;
+ uint32_t u;
+} union32f_ud;
+
+typedef union
+{
+ __m512 m;
+ uint8_t u[64];
+} union512_ub;
+
+#if defined(AMX_FP16) || defined(AMX_COMPLEX) || defined (AMX_AVX512)
/* Transformation functions between fp16/float */
static uint16_t make_f32_fp16 (float f)
{
@@ -58,4 +74,87 @@ void init_fp16_max_tile_zero_buffer (uint8_t* buf)
}
#endif
+#if defined (AMX_AVX512)
+/* Transformation functions between bf16/float */
+static uint16_t make_f32_bf16 (float f)
+{
+ union16bh_uw tmp;
+ tmp.bf16 = (__bf16) f;
+ return tmp.u;
+}
+
+static float make_bf16_f32 (uint16_t bf)
+{
+ union16bh_uw tmp;
+ tmp.u = bf;
+ return _mm_cvtsbh_ss (tmp.bf16);
+}
+
+/* Init tile buffer with bf16 pairs */
+void init_bf16_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ uint16_t* ptr = (uint16_t *) buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 32; j++)
+ {
+ float f = 2.5f * i + 1.25f * j;
+ ptr[i * 32 + j] = make_f32_bf16 (f);
+ }
+}
+#endif
+
+/* Init tile buffer with fp32 */
+void init_fp32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ float* ptr = (float *) buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = 2.5f * i + 1.25f * j;
+}
+
+/* Init tile buffer with int32 */
+void init_int32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ uint32_t *ptr = (uint32_t *)buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = (uint32_t) (3 * j - 16 * i);
+}
+
+#define COMPARE_ZMM(A, B) \
+for (int j = 0; j < 16; j++) \
+{ \
+ union32f_ud fu1, fu2; \
+ fu1.f = A[j]; \
+ fu2.f = B[j]; \
+ if (fu1.u != fu2.u) \
+ abort (); \
+}
+
+#define COMPARE_ZMM_BF16(A, B) \
+for (int j = 0; j < 32; j++) \
+{ \
+ union16bh_uw fu1, fu2; \
+ fu1.bf16 = A[j]; \
+ fu2.bf16 = B[j]; \
+ if (fu1.u != fu2.u) \
+ abort(); \
+}
+
+#define COMPARE_ZMM_FP16(A, B) \
+for (int j = 0; j < 32; j++) \
+{ \
+ union16f_uw fu1, fu2; \
+ fu1.f16 = A[j]; \
+ fu2.f16 = B[j]; \
+ if (fu1.u != fu2.u) \
+ abort(); \
+}
+
#endif
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
new file mode 100644
index 0000000..497218d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */
+#include <immintrin.h>
+
+#define TMM1 1
+
+__m512 a;
+__m512bh b;
+__m512h c;
+
+void TEST ()
+{
+ a = _tile_cvtrowd2ps (TMM1, 1);
+ a = _tile_cvtrowd2psi (TMM1, 2);
+ b = _tile_cvtrowps2pbf16h (TMM1, 3);
+ b = _tile_cvtrowps2pbf16hi (TMM1, 4);
+ b = _tile_cvtrowps2pbf16l (TMM1, 5);
+ b = _tile_cvtrowps2pbf16li (TMM1, 6);
+ c = _tile_cvtrowps2phh (TMM1, 7);
+ c = _tile_cvtrowps2phhi (TMM1, 8);
+ c = _tile_cvtrowps2phl (TMM1, 9);
+ c = _tile_cvtrowps2phli (TMM1, 10);
+ a = _tile_movrow (TMM1, 11);
+ a = _tile_movrowi (TMM1, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
new file mode 100644
index 0000000..4011043
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */
+/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+#include <immintrin.h>
+
+__m512 a;
+__m512bh b;
+__m512h c;
+
+void TEST ()
+{
+ a = _tile_cvtrowd2ps (1, 1);
+ a = _tile_cvtrowd2psi (1, 2);
+ b = _tile_cvtrowps2pbf16h (1, 3);
+ b = _tile_cvtrowps2pbf16hi (1, 4);
+ b = _tile_cvtrowps2pbf16l (1, 5);
+ b = _tile_cvtrowps2pbf16li (1, 6);
+ c = _tile_cvtrowps2phh (1, 7);
+ c = _tile_cvtrowps2phhi (1, 8);
+ c = _tile_cvtrowps2phl (1, 9);
+ c = _tile_cvtrowps2phli (1, 10);
+ a = _tile_movrow (1, 11);
+ a = _tile_movrowi (1, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c
new file mode 100644
index 0000000..cfd5644
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_cvtrowd2ps
+void test_amx_avx512_cvtrowd2ps();
+#include "amx-helper.h"
+
+volatile __m512 cal_dst, cmp_dst;
+
+#define DEFINE_TEST_CVTROWD2PS(EI, T) \
+__m512 \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_cvtrowd2ps##EI (__tile *src, T __A) \
+{ \
+ uint32_t *src_buf = (uint32_t *)src->buf; \
+ int N = src->colsb / 4; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk, j; \
+ __m512 res; \
+ if ((#EI) == "e") \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ for (j = 0; j < vl_bytes / 4; j++) \
+ if (j + row_chunk / 4 >= N) \
+ res[j] = 0; \
+ else \
+ res[j] = (float) (int) src_buf[row_index * N + j + row_chunk / 4]; \
+ return res; \
+}
+
+DEFINE_TEST_CVTROWD2PS(e, unsigned)
+DEFINE_TEST_CVTROWD2PS(i, const unsigned)
+
+#define TEST_CVTROWD2PS(X, Y, EI, T, INTRIN) \
+cal_dst = calc_cvtrowd2ps##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM(cal_dst, cmp_dst);
+
+void test_amx_avx512_cvtrowd2ps()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ uint8_t tmp_dst_buf[1024];
+ unsigned a = 2;
+
+ init_int32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
+
+ TEST_CVTROWD2PS (&src, a, e, unsigned, cvtrowd2ps);
+ TEST_CVTROWD2PS (&src, 1, i, const unsigned, cvtrowd2psi);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
new file mode 100644
index 0000000..dfd1d6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_cvtrowps2pbf16
+void test_amx_avx512_cvtrowps2pbf16();
+#include "amx-helper.h"
+
+volatile __m512bh cal_dst, cmp_dst;
+
+#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \
+__m512bh \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \
+{ \
+ float *src_buf = (float *) src->buf; \
+ int N = src->colsb / 4; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk, zeropos, pos, j, k; \
+ __m512bh res; \
+ if ((#EI) == "e") \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ if ((#HL) == "h") \
+ { \
+ zeropos = 0; \
+ pos = 1; \
+ } \
+ else \
+ { \
+ zeropos = 1; \
+ pos = 0; \
+ } \
+ for (j = 0; j < vl_bytes / 4; j++) \
+ if (j + row_chunk / 4 >= N) \
+ for (k = 0; k < 2; k++) \
+ res[2 * j + k] = 0; \
+ else \
+ { \
+ union16bh_uw tmp; \
+ tmp.u = make_f32_bf16 (src_buf[row_index * N + j + row_chunk / 4]); \
+ res[2 * j + pos] = tmp.bf16; \
+ res[2 * j + zeropos] = (__bf16) 0; \
+ } \
+ return res; \
+}
+
+DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned)
+DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned)
+DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned)
+DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned)
+
+#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \
+cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM_BF16(cal_dst, cmp_dst);
+
+void test_amx_avx512_cvtrowps2pbf16 ()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ uint8_t tmp_dst_buf[1024];
+ unsigned a = 2;
+
+ init_fp32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
+
+ TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h);
+ TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l);
+ TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi);
+ TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c
new file mode 100644
index 0000000..1fd28de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_cvtrowps2ph
+void test_amx_avx512_cvtrowps2ph();
+#include "amx-helper.h"
+
+volatile __m512h cal_dst, cmp_dst;
+
+#define DEFINE_TEST_CVTROWPS2PH(HL, EI, T) \
+__m512h \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_cvtrowps2ph##HL##EI (__tile *src, T __A) \
+{ \
+ float *src_buf = (float *) src->buf; \
+ int N = src->colsb / 4; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk, zeropos, pos, j, k; \
+ __m512h res; \
+ if ((#EI) == "e") \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ if ((#HL) == "h") \
+ { \
+ zeropos = 0; \
+ pos = 1; \
+ } \
+ else \
+ { \
+ zeropos = 1; \
+ pos = 0; \
+ } \
+ for (j = 0; j < vl_bytes / 4; j++) \
+ if (j + row_chunk / 4 >= N) \
+ for (k = 0; k < 2; k++) \
+ res[2 * j + k] = 0; \
+ else \
+ { \
+ union16f_uw tmp; \
+ tmp.u = make_f32_fp16 (src_buf[row_index * N + j + row_chunk / 4]); \
+ res[2 * j + zeropos] = 0; \
+ res[2 * j + pos] = tmp.f16; \
+ } \
+ return res; \
+}
+
+DEFINE_TEST_CVTROWPS2PH(h, e, unsigned)
+DEFINE_TEST_CVTROWPS2PH(l, e, unsigned)
+DEFINE_TEST_CVTROWPS2PH(h, i, const unsigned)
+DEFINE_TEST_CVTROWPS2PH(l, i, const unsigned)
+
+#define TEST_CVTROWPS2PH(X, Y, HL, EI, T, INTRIN) \
+cal_dst = calc_cvtrowps2ph##HL##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM_FP16(cal_dst, cmp_dst);
+
+void test_amx_avx512_cvtrowps2ph ()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ uint8_t tmp_dst_buf[1024];
+ unsigned a = 2;
+
+ init_fp32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
+
+ TEST_CVTROWPS2PH (&src, a, h, e, unsigned, cvtrowps2phh);
+ TEST_CVTROWPS2PH (&src, a, l, e, unsigned, cvtrowps2phl);
+ TEST_CVTROWPS2PH (&src, 1, h, i, const unsigned, cvtrowps2phhi);
+ TEST_CVTROWPS2PH (&src, 1, l, i, const unsigned, cvtrowps2phli);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c
new file mode 100644
index 0000000..ea28d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_movrow
+void test_amx_avx512_movrow();
+#include "amx-helper.h"
+
+int j, k;
+volatile __m512 cal_dst, cmp_dst;
+
+#define TEST_MOVROW(X, Y, EI, T, INTRIN) \
+__m512 \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_movrow##EI (__tile *src, T __A) \
+{ \
+ uint8_t *src_buf = (uint8_t *)src->buf; \
+ int N = src->colsb; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk; \
+ __m512 res; \
+ if ((EI) == 'e') \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ union512_ub tmp; \
+ for (j = 0; j < vl_bytes; j++) \
+ if (j + row_chunk >= N) \
+ tmp.u[j] = 0; \
+ else \
+ tmp.u[j] = src_buf[row_index * N + j + row_chunk]; \
+ res = tmp.m; \
+ return res; \
+} \
+cal_dst = calc_movrow##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM(cal_dst, cmp_dst);
+
+void test_amx_avx512_movrow()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ unsigned a = 2;
+ char e = 'e', i = 'i';
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src (1, src);
+
+ TEST_MOVROW (&src, a, e, unsigned, movrow);
+ TEST_MOVROW (&src, 1, i, const unsigned, movrowi);
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 0852e53..b4ffc5f 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -89,6 +89,7 @@ extern void test_sm4 (void) __attribute__((__target__("sm4")
extern void test_user_msr (void) __attribute__((__target__("usermsr")));
extern void test_avx10_2 (void) __attribute__((__target__("avx10.2")));
extern void test_avx10_2_512 (void) __attribute__((__target__("avx10.2-512")));
+extern void test_amx_avx512 (void) __attribute__((__target__("amx-avx512")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq")));
@@ -179,6 +180,7 @@ extern void test_no_sm4 (void) __attribute__((__target__("no-sm
extern void test_no_user_msr (void) __attribute__((__target__("no-usermsr")));
extern void test_no_avx10_2 (void) __attribute__((__target__("no-avx10.2")));
extern void test_no_avx10_2_512 (void) __attribute__((__target__("no-avx10.2-512")));
+extern void test_no_amx_avx512 (void) __attribute__((__target__("no-amx-avx512")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index fbc39c5..3349ce0 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index b32a5d75..9725cfe 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 4662c86..13e636c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 229e2f7..7c43c06 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512")
#endif
/* Following intrinsics require immediate arguments. They
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index f0e2054..76e0d8d 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -1082,6 +1082,6 @@
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512")
#include <x86intrin.h>
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5638e45..3b18269 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -10775,6 +10775,17 @@ proc check_effective_target_avx10_2_512 { } {
} "-mavx10.2-512" ]
}
+# Return 1 if amx-avx512 instructions can be compiled.
+proc check_effective_target_amx_avx512 { } {
+ return [check_no_compiler_messages amx_avx512 object {
+ void
+ foo ()
+ {
+ __asm__ volatile ("tilemovrow\t%%edx, %%tmm2, %%zmm1" ::);
+ }
+ } "-mamx-avx512" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {