aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2024-11-01 10:04:34 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-11-01 10:09:36 +0800
commit343f8113385d00e9ffac53150bca4f78be30e19c (patch)
treeb2838f3b54b157cc20dc0f8f349ef924b3fa1c9e /gcc/testsuite/gcc.target
parent8ee5cd4b84489bee0f72153e96a9afe9493e170d (diff)
downloadgcc-343f8113385d00e9ffac53150bca4f78be30e19c.zip
gcc-343f8113385d00e9ffac53150bca4f78be30e19c.tar.gz
gcc-343f8113385d00e9ffac53150bca4f78be30e19c.tar.bz2
Support Intel AMX-AVX512
gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_available_features): Detect AMX-AVX512. * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_AVX512_SET, OPTION_MASK_ISA2_AMX_AVX512_UNSET): New. (ix86_handle_option): Handle -mamx-avx512. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AMX_AVX512. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for amx-avx512. * config.gcc: Add amxavx512intrin.h * config/i386/cpuid.h (bit_AMX_AVX512): New. * config/i386/i386-c.cc (ix86_target_macros_internal): Handle amx-avx512. * config/i386/i386-isa.def (AMX_AVX512): Add DEF_PTA(AMX_AVX512). * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): Handle amx-avx512. * config/i386/i386.opt: Add option -mamx-avx512. * config/i386/i386.opt.urls: Regenerated. * config/i386/immintrin.h: Include amxavx512intrin.h * doc/extend.texi: Document amx-avx512. * doc/invoke.texi: Document -mamx-avx512. * doc/sourcebuild.texi: Document target amx-avx512. * config/i386/amxavx512intrin.h: New file. gcc/testsuite/ChangeLog: * g++.dg/other/i386-2.C: Add -mamx-avx512. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/amx-check.h: Add cpu check for AMX-AVX512. * gcc.target/i386/amx-helper.h: Support amx-avx512. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/sse-12.c: Add -mamx-avx512. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Add amx-avx512. * gcc.target/i386/sse-23.c: Ditto. * lib/target-supports.exp (check_effective_target_amx_avx512): New. * gcc.target/i386/amxavx512-asmatt-1.c: New test. * gcc.target/i386/amxavx512-asmintel-1.c: Ditto. * gcc.target/i386/amxavx512-cvtrowd2ps-2.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2ph-2.c: Ditto. * gcc.target/i386/amxavx512-movrow-2.c: Ditto. Co-authored-by: Yu, Bing <bing1.yu@intel.com>
Diffstat (limited to 'gcc/testsuite/gcc.target')
-rw-r--r--gcc/testsuite/gcc.target/i386/amx-check.h3
-rw-r--r--gcc/testsuite/gcc.target/i386/amx-helper.h105
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/funcspec-56.inc2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c2
14 files changed, 459 insertions, 9 deletions
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
index f1a04cf..a336392 100644
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
@@ -219,6 +219,9 @@ main ()
#ifdef AMX_COMPLEX
&& __builtin_cpu_supports ("amx-complex")
#endif
+#ifdef AMX_AVX512
+ && __builtin_cpu_supports ("amx-avx512")
+#endif
#ifdef __linux__
&& request_perm_xtile_data ()
#endif
diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h
index 6ed9f5e..847882d 100644
--- a/gcc/testsuite/gcc.target/i386/amx-helper.h
+++ b/gcc/testsuite/gcc.target/i386/amx-helper.h
@@ -1,9 +1,7 @@
#ifndef AMX_HELPER_H_INCLUDED
#define AMX_HELPER_H_INCLUDED
-#if defined(AMX_FP16) || defined(AMX_COMPLEX)
#include <immintrin.h>
#include <xmmintrin.h>
-#endif
#include "amx-check.h"
typedef union
@@ -12,7 +10,25 @@ typedef union
uint16_t u;
} union16f_uw;
-#if defined(AMX_FP16) || defined(AMX_COMPLEX)
+typedef union
+{
+ __bf16 bf16;
+ uint16_t u;
+} union16bh_uw;
+
+typedef union
+{
+ float f;
+ uint32_t u;
+} union32f_ud;
+
+typedef union
+{
+ __m512 m;
+ uint8_t u[64];
+} union512_ub;
+
+#if defined(AMX_FP16) || defined(AMX_COMPLEX) || defined (AMX_AVX512)
/* Transformation functions between fp16/float */
static uint16_t make_f32_fp16 (float f)
{
@@ -58,4 +74,87 @@ void init_fp16_max_tile_zero_buffer (uint8_t* buf)
}
#endif
+#if defined (AMX_AVX512)
+/* Transformation functions between bf16/float */
+static uint16_t make_f32_bf16 (float f)
+{
+ union16bh_uw tmp;
+ tmp.bf16 = (__bf16) f;
+ return tmp.u;
+}
+
+static float make_bf16_f32 (uint16_t bf)
+{
+ union16bh_uw tmp;
+ tmp.u = bf;
+ return _mm_cvtsbh_ss (tmp.bf16);
+}
+
+/* Init tile buffer with bf16 pairs */
+void init_bf16_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ uint16_t* ptr = (uint16_t *) buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 32; j++)
+ {
+ float f = 2.5f * i + 1.25f * j;
+ ptr[i * 32 + j] = make_f32_bf16 (f);
+ }
+}
+#endif
+
+/* Init tile buffer with fp32 */
+void init_fp32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ float* ptr = (float *) buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = 2.5f * i + 1.25f * j;
+}
+
+/* Init tile buffer with int32 */
+void init_int32_max_tile_buffer (uint8_t *buf)
+{
+ int i, j;
+ uint32_t *ptr = (uint32_t *)buf;
+
+ for (i = 0; i < 16; i++)
+ for (j = 0; j < 16; j++)
+ ptr[i * 16 + j] = (uint32_t) (3 * j - 16 * i);
+}
+
+#define COMPARE_ZMM(A, B) \
+for (int j = 0; j < 16; j++) \
+{ \
+ union32f_ud fu1, fu2; \
+ fu1.f = A[j]; \
+ fu2.f = B[j]; \
+ if (fu1.u != fu2.u) \
+ abort (); \
+}
+
+#define COMPARE_ZMM_BF16(A, B) \
+for (int j = 0; j < 32; j++) \
+{ \
+ union16bh_uw fu1, fu2; \
+ fu1.bf16 = A[j]; \
+ fu2.bf16 = B[j]; \
+ if (fu1.u != fu2.u) \
+ abort(); \
+}
+
+#define COMPARE_ZMM_FP16(A, B) \
+for (int j = 0; j < 32; j++) \
+{ \
+ union16f_uw fu1, fu2; \
+ fu1.f16 = A[j]; \
+ fu2.f16 = B[j]; \
+ if (fu1.u != fu2.u) \
+ abort(); \
+}
+
#endif
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
new file mode 100644
index 0000000..497218d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */
+/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */
+#include <immintrin.h>
+
+#define TMM1 1
+
+__m512 a;
+__m512bh b;
+__m512h c;
+
+void TEST ()
+{
+ a = _tile_cvtrowd2ps (TMM1, 1);
+ a = _tile_cvtrowd2psi (TMM1, 2);
+ b = _tile_cvtrowps2pbf16h (TMM1, 3);
+ b = _tile_cvtrowps2pbf16hi (TMM1, 4);
+ b = _tile_cvtrowps2pbf16l (TMM1, 5);
+ b = _tile_cvtrowps2pbf16li (TMM1, 6);
+ c = _tile_cvtrowps2phh (TMM1, 7);
+ c = _tile_cvtrowps2phhi (TMM1, 8);
+ c = _tile_cvtrowps2phl (TMM1, 9);
+ c = _tile_cvtrowps2phli (TMM1, 10);
+ a = _tile_movrow (TMM1, 11);
+ a = _tile_movrowi (TMM1, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
new file mode 100644
index 0000000..4011043
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */
+/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
+#include <immintrin.h>
+
+__m512 a;
+__m512bh b;
+__m512h c;
+
+void TEST ()
+{
+ a = _tile_cvtrowd2ps (1, 1);
+ a = _tile_cvtrowd2psi (1, 2);
+ b = _tile_cvtrowps2pbf16h (1, 3);
+ b = _tile_cvtrowps2pbf16hi (1, 4);
+ b = _tile_cvtrowps2pbf16l (1, 5);
+ b = _tile_cvtrowps2pbf16li (1, 6);
+ c = _tile_cvtrowps2phh (1, 7);
+ c = _tile_cvtrowps2phhi (1, 8);
+ c = _tile_cvtrowps2phl (1, 9);
+ c = _tile_cvtrowps2phli (1, 10);
+ a = _tile_movrow (1, 11);
+ a = _tile_movrowi (1, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c
new file mode 100644
index 0000000..cfd5644
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_cvtrowd2ps
+void test_amx_avx512_cvtrowd2ps();
+#include "amx-helper.h"
+
+volatile __m512 cal_dst, cmp_dst;
+
+#define DEFINE_TEST_CVTROWD2PS(EI, T) \
+__m512 \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_cvtrowd2ps##EI (__tile *src, T __A) \
+{ \
+ uint32_t *src_buf = (uint32_t *)src->buf; \
+ int N = src->colsb / 4; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk, j; \
+ __m512 res; \
+ if ((#EI) == "e") \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ for (j = 0; j < vl_bytes / 4; j++) \
+ if (j + row_chunk / 4 >= N) \
+ res[j] = 0; \
+ else \
+ res[j] = (float) (int) src_buf[row_index * N + j + row_chunk / 4]; \
+ return res; \
+}
+
+DEFINE_TEST_CVTROWD2PS(e, unsigned)
+DEFINE_TEST_CVTROWD2PS(i, const unsigned)
+
+#define TEST_CVTROWD2PS(X, Y, EI, T, INTRIN) \
+cal_dst = calc_cvtrowd2ps##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM(cal_dst, cmp_dst);
+
+void test_amx_avx512_cvtrowd2ps()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ uint8_t tmp_dst_buf[1024];
+ unsigned a = 2;
+
+ init_int32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
+
+ TEST_CVTROWD2PS (&src, a, e, unsigned, cvtrowd2ps);
+ TEST_CVTROWD2PS (&src, 1, i, const unsigned, cvtrowd2psi);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
new file mode 100644
index 0000000..dfd1d6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_cvtrowps2pbf16
+void test_amx_avx512_cvtrowps2pbf16();
+#include "amx-helper.h"
+
+volatile __m512bh cal_dst, cmp_dst;
+
+#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \
+__m512bh \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \
+{ \
+ float *src_buf = (float *) src->buf; \
+ int N = src->colsb / 4; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk, zeropos, pos, j, k; \
+ __m512bh res; \
+ if ((#EI) == "e") \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ if ((#HL) == "h") \
+ { \
+ zeropos = 0; \
+ pos = 1; \
+ } \
+ else \
+ { \
+ zeropos = 1; \
+ pos = 0; \
+ } \
+ for (j = 0; j < vl_bytes / 4; j++) \
+ if (j + row_chunk / 4 >= N) \
+ for (k = 0; k < 2; k++) \
+ res[2 * j + k] = 0; \
+ else \
+ { \
+ union16bh_uw tmp; \
+ tmp.u = make_f32_bf16 (src_buf[row_index * N + j + row_chunk / 4]); \
+ res[2 * j + pos] = tmp.bf16; \
+ res[2 * j + zeropos] = (__bf16) 0; \
+ } \
+ return res; \
+}
+
+DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned)
+DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned)
+DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned)
+DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned)
+
+#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \
+cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM_BF16(cal_dst, cmp_dst);
+
+void test_amx_avx512_cvtrowps2pbf16 ()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ uint8_t tmp_dst_buf[1024];
+ unsigned a = 2;
+
+ init_fp32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
+
+ TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h);
+ TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l);
+ TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi);
+ TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c
new file mode 100644
index 0000000..1fd28de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_cvtrowps2ph
+void test_amx_avx512_cvtrowps2ph();
+#include "amx-helper.h"
+
+volatile __m512h cal_dst, cmp_dst;
+
+#define DEFINE_TEST_CVTROWPS2PH(HL, EI, T) \
+__m512h \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_cvtrowps2ph##HL##EI (__tile *src, T __A) \
+{ \
+ float *src_buf = (float *) src->buf; \
+ int N = src->colsb / 4; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk, zeropos, pos, j, k; \
+ __m512h res; \
+ if ((#EI) == "e") \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ if ((#HL) == "h") \
+ { \
+ zeropos = 0; \
+ pos = 1; \
+ } \
+ else \
+ { \
+ zeropos = 1; \
+ pos = 0; \
+ } \
+ for (j = 0; j < vl_bytes / 4; j++) \
+ if (j + row_chunk / 4 >= N) \
+ for (k = 0; k < 2; k++) \
+ res[2 * j + k] = 0; \
+ else \
+ { \
+ union16f_uw tmp; \
+ tmp.u = make_f32_fp16 (src_buf[row_index * N + j + row_chunk / 4]); \
+ res[2 * j + zeropos] = 0; \
+ res[2 * j + pos] = tmp.f16; \
+ } \
+ return res; \
+}
+
+DEFINE_TEST_CVTROWPS2PH(h, e, unsigned)
+DEFINE_TEST_CVTROWPS2PH(l, e, unsigned)
+DEFINE_TEST_CVTROWPS2PH(h, i, const unsigned)
+DEFINE_TEST_CVTROWPS2PH(l, i, const unsigned)
+
+#define TEST_CVTROWPS2PH(X, Y, HL, EI, T, INTRIN) \
+cal_dst = calc_cvtrowps2ph##HL##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM_FP16(cal_dst, cmp_dst);
+
+void test_amx_avx512_cvtrowps2ph ()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ uint8_t tmp_dst_buf[1024];
+ unsigned a = 2;
+
+ init_fp32_max_tile_buffer (tmp_dst_buf);
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
+
+ TEST_CVTROWPS2PH (&src, a, h, e, unsigned, cvtrowps2phh);
+ TEST_CVTROWPS2PH (&src, a, l, e, unsigned, cvtrowps2phl);
+ TEST_CVTROWPS2PH (&src, 1, h, i, const unsigned, cvtrowps2phhi);
+ TEST_CVTROWPS2PH (&src, 1, l, i, const unsigned, cvtrowps2phli);
+}
diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c
new file mode 100644
index 0000000..ea28d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target amx_avx512 } */
+/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
+#define AMX_AVX512
+#define DO_TEST test_amx_avx512_movrow
+void test_amx_avx512_movrow();
+#include "amx-helper.h"
+
+int j, k;
+volatile __m512 cal_dst, cmp_dst;
+
+#define TEST_MOVROW(X, Y, EI, T, INTRIN) \
+__m512 \
+__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
+calc_movrow##EI (__tile *src, T __A) \
+{ \
+ uint8_t *src_buf = (uint8_t *)src->buf; \
+ int N = src->colsb; \
+ int vl = 512; \
+ int vl_bytes = vl >> 3; \
+ int row_index, row_chunk; \
+ __m512 res; \
+ if ((EI) == 'e') \
+ { \
+ row_index = (__A) & 0xffff; \
+ row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \
+ } \
+ else \
+ { \
+ row_index = (__A) & 0x3f; \
+ row_chunk = ((__A) >> 6) * vl_bytes; \
+ } \
+ union512_ub tmp; \
+ for (j = 0; j < vl_bytes; j++) \
+ if (j + row_chunk >= N) \
+ tmp.u[j] = 0; \
+ else \
+ tmp.u[j] = src_buf[row_index * N + j + row_chunk]; \
+ res = tmp.m; \
+ return res; \
+} \
+cal_dst = calc_movrow##EI (X, Y); \
+cmp_dst = _tile_##INTRIN (1, Y); \
+COMPARE_ZMM(cal_dst, cmp_dst);
+
+void test_amx_avx512_movrow()
+{
+ __tilecfg_u cfg;
+ __tile src;
+ unsigned a = 2;
+ char e = 'e', i = 'i';
+
+ init_tile_config (&cfg);
+ init_tile_reg_and_src (1, src);
+
+ TEST_MOVROW (&src, a, e, unsigned, movrow);
+ TEST_MOVROW (&src, 1, i, const unsigned, movrowi);
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 0852e53..b4ffc5f 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -89,6 +89,7 @@ extern void test_sm4 (void) __attribute__((__target__("sm4")
extern void test_user_msr (void) __attribute__((__target__("usermsr")));
extern void test_avx10_2 (void) __attribute__((__target__("avx10.2")));
extern void test_avx10_2_512 (void) __attribute__((__target__("avx10.2-512")));
+extern void test_amx_avx512 (void) __attribute__((__target__("amx-avx512")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq")));
@@ -179,6 +180,7 @@ extern void test_no_sm4 (void) __attribute__((__target__("no-sm
extern void test_no_user_msr (void) __attribute__((__target__("no-usermsr")));
extern void test_no_avx10_2 (void) __attribute__((__target__("no-avx10.2")));
extern void test_no_avx10_2_512 (void) __attribute__((__target__("no-avx10.2-512")));
+extern void test_no_amx_avx512 (void) __attribute__((__target__("no-amx-avx512")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index fbc39c5..3349ce0 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index b32a5d75..9725cfe 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 4662c86..13e636c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 229e2f7..7c43c06 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512")
#endif
/* Following intrinsics require immediate arguments. They
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index f0e2054..76e0d8d 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -1082,6 +1082,6 @@
#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512")
#include <x86intrin.h>