diff options
author | Haochen Jiang <haochen.jiang@intel.com> | 2024-11-01 10:04:34 +0800 |
---|---|---|
committer | Haochen Jiang <haochen.jiang@intel.com> | 2024-11-01 10:09:36 +0800 |
commit | 343f8113385d00e9ffac53150bca4f78be30e19c (patch) | |
tree | b2838f3b54b157cc20dc0f8f349ef924b3fa1c9e /gcc/testsuite/gcc.target | |
parent | 8ee5cd4b84489bee0f72153e96a9afe9493e170d (diff) | |
download | gcc-343f8113385d00e9ffac53150bca4f78be30e19c.zip gcc-343f8113385d00e9ffac53150bca4f78be30e19c.tar.gz gcc-343f8113385d00e9ffac53150bca4f78be30e19c.tar.bz2 |
Support Intel AMX-AVX512
gcc/ChangeLog:
* common/config/i386/cpuinfo.h (get_available_features):
Detect AMX-AVX512.
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_AVX512_SET,
OPTION_MASK_ISA2_AMX_AVX512_UNSET): New.
(ix86_handle_option): Handle -mamx-avx512.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_AVX512.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
amx-avx512.
* config.gcc: Add amxavx512intrin.h
* config/i386/cpuid.h (bit_AMX_AVX512): New.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Handle amx-avx512.
* config/i386/i386-isa.def (AMX_AVX512): Add DEF_PTA(AMX_AVX512).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle amx-avx512.
* config/i386/i386.opt: Add option -mamx-avx512.
* config/i386/i386.opt.urls: Regenerated.
* config/i386/immintrin.h: Include amxavx512intrin.h
* doc/extend.texi: Document amx-avx512.
* doc/invoke.texi: Document -mamx-avx512.
* doc/sourcebuild.texi: Document target amx-avx512.
* config/i386/amxavx512intrin.h: New file.
gcc/testsuite/ChangeLog:
* g++.dg/other/i386-2.C: Add -mamx-avx512.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/amx-check.h: Add cpu check for AMX-AVX512.
* gcc.target/i386/amx-helper.h: Support amx-avx512.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mamx-avx512.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add amx-avx512.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_amx_avx512): New.
* gcc.target/i386/amxavx512-asmatt-1.c: New test.
* gcc.target/i386/amxavx512-asmintel-1.c: Ditto.
* gcc.target/i386/amxavx512-cvtrowd2ps-2.c: Ditto.
* gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Ditto.
* gcc.target/i386/amxavx512-cvtrowps2ph-2.c: Ditto.
* gcc.target/i386/amxavx512-movrow-2.c: Ditto.
Co-authored-by: Yu, Bing <bing1.yu@intel.com>
Diffstat (limited to 'gcc/testsuite/gcc.target')
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amx-check.h | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amx-helper.h | 105 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c | 82 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c | 82 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c | 59 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-12.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-13.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-14.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-22.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-23.c | 2 |
14 files changed, 459 insertions, 9 deletions
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h index f1a04cf..a336392 100644 --- a/gcc/testsuite/gcc.target/i386/amx-check.h +++ b/gcc/testsuite/gcc.target/i386/amx-check.h @@ -219,6 +219,9 @@ main () #ifdef AMX_COMPLEX && __builtin_cpu_supports ("amx-complex") #endif +#ifdef AMX_AVX512 + && __builtin_cpu_supports ("amx-avx512") +#endif #ifdef __linux__ && request_perm_xtile_data () #endif diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h index 6ed9f5e..847882d 100644 --- a/gcc/testsuite/gcc.target/i386/amx-helper.h +++ b/gcc/testsuite/gcc.target/i386/amx-helper.h @@ -1,9 +1,7 @@ #ifndef AMX_HELPER_H_INCLUDED #define AMX_HELPER_H_INCLUDED -#if defined(AMX_FP16) || defined(AMX_COMPLEX) #include <immintrin.h> #include <xmmintrin.h> -#endif #include "amx-check.h" typedef union @@ -12,7 +10,25 @@ typedef union uint16_t u; } union16f_uw; -#if defined(AMX_FP16) || defined(AMX_COMPLEX) +typedef union +{ + __bf16 bf16; + uint16_t u; +} union16bh_uw; + +typedef union +{ + float f; + uint32_t u; +} union32f_ud; + +typedef union +{ + __m512 m; + uint8_t u[64]; +} union512_ub; + +#if defined(AMX_FP16) || defined(AMX_COMPLEX) || defined (AMX_AVX512) /* Transformation functions between fp16/float */ static uint16_t make_f32_fp16 (float f) { @@ -58,4 +74,87 @@ void init_fp16_max_tile_zero_buffer (uint8_t* buf) } #endif +#if defined (AMX_AVX512) +/* Transformation functions between bf16/float */ +static uint16_t make_f32_bf16 (float f) +{ + union16bh_uw tmp; + tmp.bf16 = (__bf16) f; + return tmp.u; +} + +static float make_bf16_f32 (uint16_t bf) +{ + union16bh_uw tmp; + tmp.u = bf; + return _mm_cvtsbh_ss (tmp.bf16); +} + +/* Init tile buffer with bf16 pairs */ +void init_bf16_max_tile_buffer (uint8_t *buf) +{ + int i, j; + uint16_t* ptr = (uint16_t *) buf; + + for (i = 0; i < 16; i++) + for (j = 0; j < 32; j++) + { + float f = 2.5f * i + 1.25f * j; + ptr[i * 32 + j] = make_f32_bf16 (f); + } +} +#endif + +/* Init tile buffer with fp32 */ +void init_fp32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + float* ptr = (float *) buf; + + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = 2.5f * i + 1.25f * j; +} + +/* Init tile buffer with int32 */ +void init_int32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + uint32_t *ptr = (uint32_t *)buf; + + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = (uint32_t) (3 * j - 16 * i); +} + +#define COMPARE_ZMM(A, B) \ +for (int j = 0; j < 16; j++) \ +{ \ + union32f_ud fu1, fu2; \ + fu1.f = A[j]; \ + fu2.f = B[j]; \ + if (fu1.u != fu2.u) \ + abort (); \ +} + +#define COMPARE_ZMM_BF16(A, B) \ +for (int j = 0; j < 32; j++) \ +{ \ + union16bh_uw fu1, fu2; \ + fu1.bf16 = A[j]; \ + fu2.bf16 = B[j]; \ + if (fu1.u != fu2.u) \ + abort(); \ +} + +#define COMPARE_ZMM_FP16(A, B) \ +for (int j = 0; j < 32; j++) \ +{ \ + union16f_uw fu1, fu2; \ + fu1.f16 = A[j]; \ + fu2.f16 = B[j]; \ + if (fu1.u != fu2.u) \ + abort(); \ +} + #endif diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c new file mode 100644 index 0000000..497218d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */ +#include <immintrin.h> + +#define TMM1 1 + +__m512 a; +__m512bh b; +__m512h c; + +void TEST () +{ + a = _tile_cvtrowd2ps (TMM1, 1); + a = _tile_cvtrowd2psi (TMM1, 2); + b = _tile_cvtrowps2pbf16h (TMM1, 3); + b = _tile_cvtrowps2pbf16hi (TMM1, 4); + b = _tile_cvtrowps2pbf16l (TMM1, 5); + b = _tile_cvtrowps2pbf16li (TMM1, 6); + c = _tile_cvtrowps2phh (TMM1, 7); + c = _tile_cvtrowps2phhi (TMM1, 8); + c = _tile_cvtrowps2phl (TMM1, 9); + c = _tile_cvtrowps2phli (TMM1, 10); + a = _tile_movrow (TMM1, 11); + a = _tile_movrowi (TMM1, 12); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c new file mode 100644 index 0000000..4011043 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */ +/* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +#include <immintrin.h> + +__m512 a; +__m512bh b; +__m512h c; + +void TEST () +{ + a = _tile_cvtrowd2ps (1, 1); + a = _tile_cvtrowd2psi (1, 2); + b = _tile_cvtrowps2pbf16h (1, 3); + b = _tile_cvtrowps2pbf16hi (1, 4); + b = _tile_cvtrowps2pbf16l (1, 5); + b = _tile_cvtrowps2pbf16li (1, 6); + c = _tile_cvtrowps2phh (1, 7); + c = _tile_cvtrowps2phhi (1, 8); + c = _tile_cvtrowps2phl (1, 9); + c = _tile_cvtrowps2phli (1, 10); + a = _tile_movrow (1, 11); + a = _tile_movrowi (1, 12); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c new file mode 100644 index 0000000..cfd5644 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowd2ps-2.c @@ -0,0 +1,62 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowd2ps +void test_amx_avx512_cvtrowd2ps(); +#include "amx-helper.h" + +volatile __m512 cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWD2PS(EI, T) \ +__m512 \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowd2ps##EI (__tile *src, T __A) \ +{ \ + uint32_t *src_buf = (uint32_t *)src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, j; \ + __m512 res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + res[j] = 0; \ + else \ + res[j] = (float) (int) src_buf[row_index * N + j + row_chunk / 4]; \ + return res; \ +} + +DEFINE_TEST_CVTROWD2PS(e, unsigned) +DEFINE_TEST_CVTROWD2PS(i, const unsigned) + +#define TEST_CVTROWD2PS(X, Y, EI, T, INTRIN) \ +cal_dst = calc_cvtrowd2ps##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowd2ps() +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_int32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWD2PS (&src, a, e, unsigned, cvtrowd2ps); + TEST_CVTROWD2PS (&src, 1, i, const unsigned, cvtrowd2psi); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c new file mode 100644 index 0000000..dfd1d6a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c @@ -0,0 +1,82 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowps2pbf16 +void test_amx_avx512_cvtrowps2pbf16(); +#include "amx-helper.h" + +volatile __m512bh cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \ +__m512bh \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \ +{ \ + float *src_buf = (float *) src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, zeropos, pos, j, k; \ + __m512bh res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + if ((#HL) == "h") \ + { \ + zeropos = 0; \ + pos = 1; \ + } \ + else \ + { \ + zeropos = 1; \ + pos = 0; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + for (k = 0; k < 2; k++) \ + res[2 * j + k] = 0; \ + else \ + { \ + union16bh_uw tmp; \ + tmp.u = make_f32_bf16 (src_buf[row_index * N + j + row_chunk / 4]); \ + res[2 * j + pos] = tmp.bf16; \ + res[2 * j + zeropos] = (__bf16) 0; \ + } \ + return res; \ +} + +DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned) +DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned) +DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned) +DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned) + +#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \ +cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM_BF16(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowps2pbf16 () +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_fp32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h); + TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l); + TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi); + TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c new file mode 100644 index 0000000..1fd28de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2ph-2.c @@ -0,0 +1,82 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowps2ph +void test_amx_avx512_cvtrowps2ph(); +#include "amx-helper.h" + +volatile __m512h cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWPS2PH(HL, EI, T) \ +__m512h \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowps2ph##HL##EI (__tile *src, T __A) \ +{ \ + float *src_buf = (float *) src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, zeropos, pos, j, k; \ + __m512h res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + if ((#HL) == "h") \ + { \ + zeropos = 0; \ + pos = 1; \ + } \ + else \ + { \ + zeropos = 1; \ + pos = 0; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + for (k = 0; k < 2; k++) \ + res[2 * j + k] = 0; \ + else \ + { \ + union16f_uw tmp; \ + tmp.u = make_f32_fp16 (src_buf[row_index * N + j + row_chunk / 4]); \ + res[2 * j + zeropos] = 0; \ + res[2 * j + pos] = tmp.f16; \ + } \ + return res; \ +} + +DEFINE_TEST_CVTROWPS2PH(h, e, unsigned) +DEFINE_TEST_CVTROWPS2PH(l, e, unsigned) +DEFINE_TEST_CVTROWPS2PH(h, i, const unsigned) +DEFINE_TEST_CVTROWPS2PH(l, i, const unsigned) + +#define TEST_CVTROWPS2PH(X, Y, HL, EI, T, INTRIN) \ +cal_dst = calc_cvtrowps2ph##HL##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM_FP16(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowps2ph () +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_fp32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWPS2PH (&src, a, h, e, unsigned, cvtrowps2phh); + TEST_CVTROWPS2PH (&src, a, l, e, unsigned, cvtrowps2phl); + TEST_CVTROWPS2PH (&src, 1, h, i, const unsigned, cvtrowps2phhi); + TEST_CVTROWPS2PH (&src, 1, l, i, const unsigned, cvtrowps2phli); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c new file mode 100644 index 0000000..ea28d82 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c @@ -0,0 +1,59 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_movrow +void test_amx_avx512_movrow(); +#include "amx-helper.h" + +int j, k; +volatile __m512 cal_dst, cmp_dst; + +#define TEST_MOVROW(X, Y, EI, T, INTRIN) \ +__m512 \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_movrow##EI (__tile *src, T __A) \ +{ \ + uint8_t *src_buf = (uint8_t *)src->buf; \ + int N = src->colsb; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk; \ + __m512 res; \ + if ((EI) == 'e') \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + union512_ub tmp; \ + for (j = 0; j < vl_bytes; j++) \ + if (j + row_chunk >= N) \ + tmp.u[j] = 0; \ + else \ + tmp.u[j] = src_buf[row_index * N + j + row_chunk]; \ + res = tmp.m; \ + return res; \ +} \ +cal_dst = calc_movrow##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM(cal_dst, cmp_dst); + +void test_amx_avx512_movrow() +{ + __tilecfg_u cfg; + __tile src; + unsigned a = 2; + char e = 'e', i = 'i'; + + init_tile_config (&cfg); + init_tile_reg_and_src (1, src); + + TEST_MOVROW (&src, a, e, unsigned, movrow); + TEST_MOVROW (&src, 1, i, const unsigned, movrowi); + +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 0852e53..b4ffc5f 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -89,6 +89,7 @@ extern void test_sm4 (void) __attribute__((__target__("sm4") extern void test_user_msr (void) __attribute__((__target__("usermsr"))); extern void test_avx10_2 (void) __attribute__((__target__("avx10.2"))); extern void test_avx10_2_512 (void) __attribute__((__target__("avx10.2-512"))); +extern void test_amx_avx512 (void) __attribute__((__target__("amx-avx512"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq"))); @@ -179,6 +180,7 @@ extern void test_no_sm4 (void) __attribute__((__target__("no-sm extern void test_no_user_msr (void) __attribute__((__target__("no-usermsr"))); extern void test_no_avx10_2 (void) __attribute__((__target__("no-avx10.2"))); extern void test_no_avx10_2_512 (void) __attribute__((__target__("no-avx10.2-512"))); +extern void test_no_amx_avx512 (void) __attribute__((__target__("no-amx-avx512"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index fbc39c5..3349ce0 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index b32a5d75..9725cfe 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 4662c86..13e636c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 229e2f7..7c43c06 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512") #endif #include <immintrin.h> test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f0e2054..76e0d8d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1082,6 +1082,6 @@ #define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512") #include <x86intrin.h> |