i386: Fix vfpclassph non-optimizied intrin

The intrin for non-optimized got a typo in mask type, which will cause the high bits of __mmask32 being unexpectedly zeroed. The test does not fail under O0 with current 1b since the testcase is wrong. We need to include avx512-mask-type.h after SIZE is defined, or it will always be __mmask8. That problem also happened in AVX10.2 testcases. I will write a seperate patch to fix that. gcc/ChangeLog: * config/i386/avx512fp16intrin.h (_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32. (_mm512_fpclass_ph_mask): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test.
author: Haochen Jiang <haochen.jiang@intel.com> 2024-09-02 15:00:22 +0800
committer: Haochen Jiang <haochen.jiang@intel.com> 2024-09-03 16:45:05 +0800
commit: 9b312595f9ac073f55d858b6f833097608b40bba (patch)
tree: 95ca7fe3221c856e4b7e723936d1e3d3af54b856
parent: 14b65af6b400284a937e1d3be45579ee8cf8c32b (diff)
download: gcc-9b312595f9ac073f55d858b6f833097608b40bba.zip
gcc-9b312595f9ac073f55d858b6f833097608b40bba.tar.gz
gcc-9b312595f9ac073f55d858b6f833097608b40bba.tar.bz2
2 files changed, 79 insertions, 2 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 1869a92..c3096b7 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -3961,11 +3961,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
 #else
 #define _mm512_mask_fpclass_ph_mask(u, x, c)				\
   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
-						 (int) (c),(__mmask8)(u)))
+						 (int) (c),(__mmask32)(u)))
 
 #define _mm512_fpclass_ph_mask(x, c)                                    \
   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
-						 (int) (c),(__mmask8)-1))
+						 (int) (c),(__mmask32)-1))
 #endif /* __OPIMTIZE__ */
 
 /* Intrinsics vgetexpph.  */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
new file mode 100644
index 0000000..4739f12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512fp16" } */
+/* { dg-require-effective-target avx512fp16 } */
+
+#define AVX512FP16
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <limits.h>
+#include <float.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#ifndef __FPCLASSPH__
+#define __FPCLASSPH__
+int check_fp_class_hp (_Float16 src, int imm)
+{
+  int qNaN_res = isnan (src);
+  int sNaN_res = isnan (src);
+  int Pzero_res = (src == 0.0);
+  int Nzero_res = (src == -0.0);
+  int PInf_res = (isinf (src) == 1);
+  int NInf_res = (isinf (src) == -1);
+  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
+  int FinNeg_res = __builtin_finite (src) && (src < 0);
+
+  int result = (((imm & 1) && qNaN_res)
+		|| (((imm >> 1) & 1) && Pzero_res)
+		|| (((imm >> 2) & 1) && Nzero_res)
+		|| (((imm >> 3) & 1) && PInf_res)
+		|| (((imm >> 4) & 1) && NInf_res)
+		|| (((imm >> 5) & 1) && Denorm_res)
+		|| (((imm >> 6) & 1) && FinNeg_res)
+		|| (((imm >> 7) & 1) && sNaN_res));
+  return result;
+}
+#endif
+
+MASK_TYPE
+CALC (_Float16 *s1, int imm)
+{
+  int i;
+  MASK_TYPE res = 0;
+
+  for (i = 0; i < SIZE; i++)
+    if (check_fp_class_hp(s1[i], imm))
+      res = res | (1 << i);
+
+  return res;
+}
+
+void
+TEST (void)
+{
+  int i;
+  UNION_TYPE (AVX512F_LEN, h) src;
+  MASK_TYPE res1, res2, res_ref = 0;
+  MASK_TYPE mask = MASK_VALUE;
+
+  src.a[SIZE - 1] = NAN;
+  src.a[SIZE - 2] = 1.0 / 0.0;
+  for (i = 0; i < SIZE - 2; i++)
+    {
+      src.a[i] = -24.43 + 0.6 * i;
+    }
+
+  res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF);
+  res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF);
+
+  res_ref = CALC (src.a, 0xFF);
+
+  if (res_ref != res1)
+    abort ();
+
+  if ((mask & res_ref) != res2)
+    abort ();
+}
author	Haochen Jiang <haochen.jiang@intel.com>	2024-09-02 15:00:22 +0800
committer	Haochen Jiang <haochen.jiang@intel.com>	2024-09-03 16:45:05 +0800
commit	9b312595f9ac073f55d858b6f833097608b40bba (patch)
tree	95ca7fe3221c856e4b7e723936d1e3d3af54b856
parent	14b65af6b400284a937e1d3be45579ee8cf8c32b (diff)
download	gcc-9b312595f9ac073f55d858b6f833097608b40bba.zip gcc-9b312595f9ac073f55d858b6f833097608b40bba.tar.gz gcc-9b312595f9ac073f55d858b6f833097608b40bba.tar.bz2