From 814cbfc91a3c9f4286d13d04075287f6dac76e74 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 2 Jan 2025 16:55:34 +0800 Subject: i386: Change mnemonics from TCVTROWPS2PBF16[H,L] to TCVTROWPS2BF16[H,L] In ISE056, the mnemonics for TCVTROWPS2PBF16[H,L] has been changed to TCVTROWPS2BF16[H,L]. gcc/ChangeLog: * config/i386/amxavx512intrin.h (_tile_cvtrowps2pbf16h_internal): Rename to... (_tile_cvtrowps2bf16h_internal): ...this. (_tile_cvtrowps2pbf16hi_internal): Rename to... (_tile_cvtrowps2bf16hi_internal): ...this. (_tile_cvtrowps2pbf16l_internal): Rename to... (_tile_cvtrowps2bf16l_internal): ...this. (_tile_cvtrowps2pbf16li_internal): Rename to... (_tile_cvtrowps2bf16li_internal): ...this. (_tile_cvtrowps2pbf16h): Rename to... (_tile_cvtrowps2bf16h): ...this. (_tile_cvtrowps2pbf16hi): Rename to... (_tile_cvtrowps2bf16hi): ...this. (_tile_cvtrowps2pbf16l): Rename to... (_tile_cvtrowps2bf16l): ...this. (_tile_cvtrowps2pbf16li): Rename to... (_tile_cvtrowps2bf16li): ...this. gcc/testsuite/ChangeLog: * gcc.target/i386/amxavx512-asmatt-1.c: Adjust intrin call. * gcc.target/i386/amxavx512-asmintel-1.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Rename to... * gcc.target/i386/amxavx512-cvtrowps2bf16-2.c: ...this. Rename test functions. --- gcc/config/i386/amxavx512intrin.h | 32 ++++----- gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c | 12 ++-- .../gcc.target/i386/amxavx512-asmintel-1.c | 12 ++-- .../gcc.target/i386/amxavx512-cvtrowps2bf16-2.c | 82 ++++++++++++++++++++++ .../gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c | 82 ---------------------- 5 files changed, 110 insertions(+), 110 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c (limited to 'gcc') diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h index 59d1429..ab53625 100644 --- a/gcc/config/i386/amxavx512intrin.h +++ b/gcc/config/i386/amxavx512intrin.h @@ -53,38 +53,38 @@ dst; \ }) -#define _tile_cvtrowps2pbf16h_internal(src,A) \ +#define _tile_cvtrowps2bf16h_internal(src,A) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16h\t%1, %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", %1}" \ + ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", %1}" \ : "=v" (dst) : "r" ((unsigned) (A))); \ dst; \ }) -#define _tile_cvtrowps2pbf16hi_internal(src,imm) \ +#define _tile_cvtrowps2bf16hi_internal(src,imm) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", "#imm"}" \ + ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", "#imm"}" \ : "=v" (dst) :); \ dst; \ }) -#define _tile_cvtrowps2pbf16l_internal(src,A) \ +#define _tile_cvtrowps2bf16l_internal(src,A) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16l\t%1, %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", %1}" \ + ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", %1}" \ : "=v" (dst) : "r" ((unsigned) (A))); \ dst; \ }) -#define _tile_cvtrowps2pbf16li_internal(src,imm) \ +#define _tile_cvtrowps2bf16li_internal(src,imm) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", "#imm"}" \ + ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", "#imm"}" \ : "=v" (dst) :); \ dst; \ }) @@ -149,17 +149,17 @@ #define _tile_cvtrowd2psi(src,imm) \ _tile_cvtrowd2psi_internal (src,imm) -#define _tile_cvtrowps2pbf16h(src,A) \ - _tile_cvtrowps2pbf16h_internal (src,A) +#define _tile_cvtrowps2bf16h(src,A) \ + _tile_cvtrowps2bf16h_internal (src,A) -#define _tile_cvtrowps2pbf16hi(src,imm) \ - _tile_cvtrowps2pbf16hi_internal (src,imm) +#define _tile_cvtrowps2bf16hi(src,imm) \ + _tile_cvtrowps2bf16hi_internal (src,imm) -#define _tile_cvtrowps2pbf16l(src,A) \ - _tile_cvtrowps2pbf16l_internal (src,A) +#define _tile_cvtrowps2bf16l(src,A) \ + _tile_cvtrowps2bf16l_internal (src,A) -#define _tile_cvtrowps2pbf16li(src,imm) \ - _tile_cvtrowps2pbf16li_internal (src,imm) +#define _tile_cvtrowps2bf16li(src,imm) \ + _tile_cvtrowps2bf16li_internal (src,imm) #define _tile_cvtrowps2phh(src,A) \ _tile_cvtrowps2phh_internal (src,A) diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c index 497218d..885f864 100644 --- a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c @@ -1,8 +1,8 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ /* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16h\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16l\[ \\t]" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */ /* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */ @@ -18,10 +18,10 @@ void TEST () { a = _tile_cvtrowd2ps (TMM1, 1); a = _tile_cvtrowd2psi (TMM1, 2); - b = _tile_cvtrowps2pbf16h (TMM1, 3); - b = _tile_cvtrowps2pbf16hi (TMM1, 4); - b = _tile_cvtrowps2pbf16l (TMM1, 5); - b = _tile_cvtrowps2pbf16li (TMM1, 6); + b = _tile_cvtrowps2bf16h (TMM1, 3); + b = _tile_cvtrowps2bf16hi (TMM1, 4); + b = _tile_cvtrowps2bf16l (TMM1, 5); + b = _tile_cvtrowps2bf16li (TMM1, 6); c = _tile_cvtrowps2phh (TMM1, 7); c = _tile_cvtrowps2phhi (TMM1, 8); c = _tile_cvtrowps2phl (TMM1, 9); diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c index 4011043..57c3705 100644 --- a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c @@ -2,8 +2,8 @@ /* { dg-require-effective-target masm_intel } */ /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */ /* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ /* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ @@ -17,10 +17,10 @@ void TEST () { a = _tile_cvtrowd2ps (1, 1); a = _tile_cvtrowd2psi (1, 2); - b = _tile_cvtrowps2pbf16h (1, 3); - b = _tile_cvtrowps2pbf16hi (1, 4); - b = _tile_cvtrowps2pbf16l (1, 5); - b = _tile_cvtrowps2pbf16li (1, 6); + b = _tile_cvtrowps2bf16h (1, 3); + b = _tile_cvtrowps2bf16hi (1, 4); + b = _tile_cvtrowps2bf16l (1, 5); + b = _tile_cvtrowps2bf16li (1, 6); c = _tile_cvtrowps2phh (1, 7); c = _tile_cvtrowps2phhi (1, 8); c = _tile_cvtrowps2phl (1, 9); diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c new file mode 100644 index 0000000..acd5f76 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c @@ -0,0 +1,82 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_avx512 } */ +/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ +#define AMX_AVX512 +#define DO_TEST test_amx_avx512_cvtrowps2bf16 +void test_amx_avx512_cvtrowps2bf16(); +#include "amx-helper.h" + +volatile __m512bh cal_dst, cmp_dst; + +#define DEFINE_TEST_CVTROWPS2BF16(HL, EI, T) \ +__m512bh \ +__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ +calc_cvtrowps2bf16##HL##EI (__tile *src, T __A) \ +{ \ + float *src_buf = (float *) src->buf; \ + int N = src->colsb / 4; \ + int vl = 512; \ + int vl_bytes = vl >> 3; \ + int row_index, row_chunk, zeropos, pos, j, k; \ + __m512bh res; \ + if ((#EI) == "e") \ + { \ + row_index = (__A) & 0xffff; \ + row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ + } \ + else \ + { \ + row_index = (__A) & 0x3f; \ + row_chunk = ((__A) >> 6) * vl_bytes; \ + } \ + if ((#HL) == "h") \ + { \ + zeropos = 0; \ + pos = 1; \ + } \ + else \ + { \ + zeropos = 1; \ + pos = 0; \ + } \ + for (j = 0; j < vl_bytes / 4; j++) \ + if (j + row_chunk / 4 >= N) \ + for (k = 0; k < 2; k++) \ + res[2 * j + k] = 0; \ + else \ + { \ + union16bh_uw tmp; \ + tmp.u = make_f32_bf16 (src_buf[row_index * N + j + row_chunk / 4]); \ + res[2 * j + pos] = tmp.bf16; \ + res[2 * j + zeropos] = (__bf16) 0; \ + } \ + return res; \ +} + +DEFINE_TEST_CVTROWPS2BF16(h, e, unsigned) +DEFINE_TEST_CVTROWPS2BF16(l, e, unsigned) +DEFINE_TEST_CVTROWPS2BF16(h, i, const unsigned) +DEFINE_TEST_CVTROWPS2BF16(l, i, const unsigned) + +#define TEST_CVTROWPS2BF16(X, Y, HL, EI, T, INTRIN) \ +cal_dst = calc_cvtrowps2bf16##HL##EI (X, Y); \ +cmp_dst = _tile_##INTRIN (1, Y); \ +COMPARE_ZMM_BF16(cal_dst, cmp_dst); + +void test_amx_avx512_cvtrowps2bf16 () +{ + __tilecfg_u cfg; + __tile src; + uint8_t tmp_dst_buf[1024]; + unsigned a = 2; + + init_fp32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); + + TEST_CVTROWPS2BF16 (&src, a, h, e, unsigned, cvtrowps2bf16h); + TEST_CVTROWPS2BF16 (&src, a, l, e, unsigned, cvtrowps2bf16l); + TEST_CVTROWPS2BF16 (&src, 1, h, i, const unsigned, cvtrowps2bf16hi); + TEST_CVTROWPS2BF16 (&src, 1, l, i, const unsigned, cvtrowps2bf16li); +} diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c deleted file mode 100644 index dfd1d6a..0000000 --- a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c +++ /dev/null @@ -1,82 +0,0 @@ -/* { dg-do run { target { ! ia32 } } } */ -/* { dg-require-effective-target amx_avx512 } */ -/* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ -#define AMX_AVX512 -#define DO_TEST test_amx_avx512_cvtrowps2pbf16 -void test_amx_avx512_cvtrowps2pbf16(); -#include "amx-helper.h" - -volatile __m512bh cal_dst, cmp_dst; - -#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \ -__m512bh \ -__attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ -calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \ -{ \ - float *src_buf = (float *) src->buf; \ - int N = src->colsb / 4; \ - int vl = 512; \ - int vl_bytes = vl >> 3; \ - int row_index, row_chunk, zeropos, pos, j, k; \ - __m512bh res; \ - if ((#EI) == "e") \ - { \ - row_index = (__A) & 0xffff; \ - row_chunk = (((__A) >> 16) & 0xffff) * vl_bytes; \ - } \ - else \ - { \ - row_index = (__A) & 0x3f; \ - row_chunk = ((__A) >> 6) * vl_bytes; \ - } \ - if ((#HL) == "h") \ - { \ - zeropos = 0; \ - pos = 1; \ - } \ - else \ - { \ - zeropos = 1; \ - pos = 0; \ - } \ - for (j = 0; j < vl_bytes / 4; j++) \ - if (j + row_chunk / 4 >= N) \ - for (k = 0; k < 2; k++) \ - res[2 * j + k] = 0; \ - else \ - { \ - union16bh_uw tmp; \ - tmp.u = make_f32_bf16 (src_buf[row_index * N + j + row_chunk / 4]); \ - res[2 * j + pos] = tmp.bf16; \ - res[2 * j + zeropos] = (__bf16) 0; \ - } \ - return res; \ -} - -DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned) -DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned) -DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned) -DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned) - -#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \ -cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \ -cmp_dst = _tile_##INTRIN (1, Y); \ -COMPARE_ZMM_BF16(cal_dst, cmp_dst); - -void test_amx_avx512_cvtrowps2pbf16 () -{ - __tilecfg_u cfg; - __tile src; - uint8_t tmp_dst_buf[1024]; - unsigned a = 2; - - init_fp32_max_tile_buffer (tmp_dst_buf); - - init_tile_config (&cfg); - init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); - - TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h); - TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l); - TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi); - TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li); -} -- cgit v1.1