diff options
author | Haochen Jiang <haochen.jiang@intel.com> | 2024-09-02 15:00:22 +0800 |
---|---|---|
committer | Haochen Jiang <haochen.jiang@intel.com> | 2024-09-03 16:45:05 +0800 |
commit | 9b312595f9ac073f55d858b6f833097608b40bba (patch) | |
tree | 95ca7fe3221c856e4b7e723936d1e3d3af54b856 | |
parent | 14b65af6b400284a937e1d3be45579ee8cf8c32b (diff) | |
download | gcc-9b312595f9ac073f55d858b6f833097608b40bba.zip gcc-9b312595f9ac073f55d858b6f833097608b40bba.tar.gz gcc-9b312595f9ac073f55d858b6f833097608b40bba.tar.bz2 |
i386: Fix vfpclassph non-optimizied intrin
The intrin for non-optimized got a typo in mask type, which will cause
the high bits of __mmask32 being unexpectedly zeroed.
The test does not fail under O0 with current 1b since the testcase is
wrong. We need to include avx512-mask-type.h after SIZE is defined, or
it will always be __mmask8. That problem also happened in AVX10.2 testcases.
I will write a seperate patch to fix that.
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h
(_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32.
(_mm512_fpclass_ph_mask): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test.
-rw-r--r-- | gcc/config/i386/avx512fp16intrin.h | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c | 77 |
2 files changed, 79 insertions, 2 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index 1869a92..c3096b7 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -3961,11 +3961,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm) #else #define _mm512_mask_fpclass_ph_mask(u, x, c) \ ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ - (int) (c),(__mmask8)(u))) + (int) (c),(__mmask32)(u))) #define _mm512_fpclass_ph_mask(x, c) \ ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ - (int) (c),(__mmask8)-1)) + (int) (c),(__mmask32)-1)) #endif /* __OPIMTIZE__ */ /* Intrinsics vgetexpph. */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c new file mode 100644 index 0000000..4739f12 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-options "-O0 -mavx512fp16" } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "avx512f-helper.h" + +#include <math.h> +#include <limits.h> +#include <float.h> +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +#ifndef __FPCLASSPH__ +#define __FPCLASSPH__ +int check_fp_class_hp (_Float16 src, int imm) +{ + int qNaN_res = isnan (src); + int sNaN_res = isnan (src); + int Pzero_res = (src == 0.0); + int Nzero_res = (src == -0.0); + int PInf_res = (isinf (src) == 1); + int NInf_res = (isinf (src) == -1); + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); + int FinNeg_res = __builtin_finite (src) && (src < 0); + + int result = (((imm & 1) && qNaN_res) + || (((imm >> 1) & 1) && Pzero_res) + || (((imm >> 2) & 1) && Nzero_res) + || (((imm >> 3) & 1) && PInf_res) + || (((imm >> 4) & 1) && NInf_res) + || (((imm >> 5) & 1) && Denorm_res) + || (((imm >> 6) & 1) && FinNeg_res) + || (((imm >> 7) & 1) && sNaN_res)); + return result; +} +#endif + +MASK_TYPE +CALC (_Float16 *s1, int imm) +{ + int i; + MASK_TYPE res = 0; + + for (i = 0; i < SIZE; i++) + if (check_fp_class_hp(s1[i], imm)) + res = res | (1 << i); + + return res; +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE res1, res2, res_ref = 0; + MASK_TYPE mask = MASK_VALUE; + + src.a[SIZE - 1] = NAN; + src.a[SIZE - 2] = 1.0 / 0.0; + for (i = 0; i < SIZE - 2; i++) + { + src.a[i] = -24.43 + 0.6 * i; + } + + res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF); + res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF); + + res_ref = CALC (src.a, 0xFF); + + if (res_ref != res1) + abort (); + + if ((mask & res_ref) != res2) + abort (); +} |